Skip to content

feat(parser): Allow primitive data types to be parsed using TypeAdapter #4502

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions aws_lambda_powertools/utilities/parser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
"""Advanced event_parser utility
"""

from . import envelopes
from .envelopes import BaseEnvelope
from .parser import event_parser, parse
from .pydantic import BaseModel, Field, ValidationError, root_validator, validator
from pydantic import BaseModel, Field, ValidationError, field_validator, model_validator

from aws_lambda_powertools.utilities.parser import envelopes
from aws_lambda_powertools.utilities.parser.envelopes import BaseEnvelope
from aws_lambda_powertools.utilities.parser.parser import event_parser, parse

__all__ = [
"event_parser",
Expand All @@ -13,7 +14,7 @@
"BaseEnvelope",
"BaseModel",
"Field",
"validator",
"root_validator",
"field_validator",
"model_validator",
"ValidationError",
]
34 changes: 0 additions & 34 deletions aws_lambda_powertools/utilities/parser/compat.py

This file was deleted.

19 changes: 12 additions & 7 deletions aws_lambda_powertools/utilities/parser/envelopes/base.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from __future__ import annotations

import logging
from abc import ABC, abstractmethod
from typing import Any, Dict, Optional, Type, TypeVar, Union
from typing import Any, Dict, Optional, TypeVar, Union

from aws_lambda_powertools.utilities.parser.types import Model
from aws_lambda_powertools.utilities.parser.functions import _retrieve_or_set_model_from_cache
from aws_lambda_powertools.utilities.parser.types import T

logger = logging.getLogger(__name__)

Expand All @@ -11,14 +14,14 @@ class BaseEnvelope(ABC):
"""ABC implementation for creating a supported Envelope"""

@staticmethod
def _parse(data: Optional[Union[Dict[str, Any], Any]], model: Type[Model]) -> Union[Model, None]:
def _parse(data: Optional[Union[Dict[str, Any], Any]], model: type[T]) -> Union[T, None]:
"""Parses envelope data against model provided

Parameters
----------
data : Dict
Data to be parsed and validated
model : Type[Model]
model : type[T]
Data model to parse and validate data against

Returns
Expand All @@ -30,15 +33,17 @@ def _parse(data: Optional[Union[Dict[str, Any], Any]], model: Type[Model]) -> Un
logger.debug("Skipping parsing as event is None")
return data

adapter = _retrieve_or_set_model_from_cache(model=model)

logger.debug("parsing event against model")
if isinstance(data, str):
logger.debug("parsing event as string")
return model.model_validate_json(data)
return adapter.validate_json(data)

return model.model_validate(data)
return adapter.validate_python(data)

@abstractmethod
def parse(self, data: Optional[Union[Dict[str, Any], Any]], model: Type[Model]):
def parse(self, data: Optional[Union[Dict[str, Any], Any]], model: type[T]):
"""Implementation to parse data against envelope model, then against the data model

NOTE: Call `_parse` method to fully parse data with model provided.
Expand Down
36 changes: 36 additions & 0 deletions aws_lambda_powertools/utilities/parser/functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from __future__ import annotations

from pydantic import TypeAdapter

from aws_lambda_powertools.shared.cache_dict import LRUDict
from aws_lambda_powertools.utilities.parser.types import T

CACHE_TYPE_ADAPTER = LRUDict(max_items=1024)


def _retrieve_or_set_model_from_cache(model: type[T]) -> TypeAdapter:
"""
Retrieves or sets a TypeAdapter instance from the cache for the given model.

If the model is already present in the cache, the corresponding TypeAdapter
instance is returned. Otherwise, a new TypeAdapter instance is created,
stored in the cache, and returned.

Parameters
----------
model: type[T]
The model type for which the TypeAdapter instance should be retrieved or set.

Returns
-------
TypeAdapter
The TypeAdapter instance for the given model,
either retrieved from the cache or newly created and stored in the cache.
"""
id_model = id(model)

if id_model in CACHE_TYPE_ADAPTER:
return CACHE_TYPE_ADAPTER[id_model]

CACHE_TYPE_ADAPTER[id_model] = TypeAdapter(model)
return CACHE_TYPE_ADAPTER[id_model]
48 changes: 32 additions & 16 deletions aws_lambda_powertools/utilities/parser/parser.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
from __future__ import annotations

import logging
import typing
from typing import Any, Callable, Dict, Optional, Type, overload

from pydantic import PydanticSchemaGenerationError, ValidationError

from aws_lambda_powertools.middleware_factory import lambda_handler_decorator
from aws_lambda_powertools.utilities.parser.envelopes.base import Envelope
from aws_lambda_powertools.utilities.parser.exceptions import InvalidEnvelopeError, InvalidModelTypeError
from aws_lambda_powertools.utilities.parser.types import EventParserReturnType, Model
from aws_lambda_powertools.utilities.parser.functions import _retrieve_or_set_model_from_cache
from aws_lambda_powertools.utilities.parser.types import EventParserReturnType, T
from aws_lambda_powertools.utilities.typing import LambdaContext

logger = logging.getLogger(__name__)
Expand All @@ -16,7 +21,7 @@ def event_parser(
handler: Callable[..., EventParserReturnType],
event: Dict[str, Any],
context: LambdaContext,
model: Optional[Type[Model]] = None,
model: Optional[type[T]] = None,
envelope: Optional[Type[Envelope]] = None,
**kwargs: Any,
) -> EventParserReturnType:
Expand All @@ -32,7 +37,7 @@ def event_parser(
This is useful when you need to confirm event wrapper structure, and
b) selectively extract a portion of your payload for parsing & validation.

NOTE: If envelope is omitted, the complete event is parsed to match the model parameter BaseModel definition.
NOTE: If envelope is omitted, the complete event is parsed to match the model parameter definition.

Example
-------
Expand Down Expand Up @@ -66,7 +71,7 @@ def handler(event: Order, context: LambdaContext):
Lambda event to be parsed & validated
context: LambdaContext
Lambda context object
model: Model
model: Optional[type[T]]
Your data model that will replace the event.
envelope: Envelope
Optional envelope to extract the model from
Expand All @@ -93,24 +98,27 @@ def handler(event: Order, context: LambdaContext):
"or as the type hint of `event` in the handler that it wraps",
)

if envelope:
parsed_event = parse(event=event, model=model, envelope=envelope)
else:
parsed_event = parse(event=event, model=model)
try:
if envelope:
parsed_event = parse(event=event, model=model, envelope=envelope)
else:
parsed_event = parse(event=event, model=model)

logger.debug(f"Calling handler {handler.__name__}")
return handler(parsed_event, context, **kwargs)
logger.debug(f"Calling handler {handler.__name__}")
return handler(parsed_event, context, **kwargs)
except (ValidationError, AttributeError) as exc:
raise InvalidModelTypeError(f"Error: {str(exc)}. Please ensure the type you're trying to parse into is correct")


@overload
def parse(event: Dict[str, Any], model: Type[Model]) -> Model: ... # pragma: no cover
def parse(event: Dict[str, Any], model: type[T]) -> T: ... # pragma: no cover


@overload
def parse(event: Dict[str, Any], model: Type[Model], envelope: Type[Envelope]) -> Model: ... # pragma: no cover
def parse(event: Dict[str, Any], model: type[T], envelope: Type[Envelope]) -> T: ... # pragma: no cover


def parse(event: Dict[str, Any], model: Type[Model], envelope: Optional[Type[Envelope]] = None):
def parse(event: Dict[str, Any], model: type[T], envelope: Optional[Type[Envelope]] = None):
"""Standalone function to parse & validate events using Pydantic models

Typically used when you need fine-grained control over error handling compared to event_parser decorator.
Expand Down Expand Up @@ -176,12 +184,20 @@ def handler(event: Order, context: LambdaContext):
) from exc

try:
adapter = _retrieve_or_set_model_from_cache(model=model)

logger.debug("Parsing and validating event model; no envelope used")
if isinstance(event, str):
return model.model_validate_json(event)
return adapter.validate_json(event)

return adapter.validate_python(event)

return model.model_validate(event)
except AttributeError as exc:
# Pydantic raises PydanticSchemaGenerationError when the model is not a Pydantic model
# This is seen in the tests where we pass a non-Pydantic model type to the parser or
# when we pass a data structure that does not match the model (trying to parse a true/false/etc into a model)
except PydanticSchemaGenerationError as exc:
raise InvalidModelTypeError(f"The event supplied is unable to be validated into {type(model)}") from exc
except ValidationError as exc:
raise InvalidModelTypeError(
f"Error: {str(exc)}. Please ensure the Input model inherits from BaseModel,\n"
"and your payload adheres to the specified Input model structure.\n"
Expand Down
9 changes: 0 additions & 9 deletions aws_lambda_powertools/utilities/parser/pydantic.py

This file was deleted.

1 change: 1 addition & 0 deletions aws_lambda_powertools/utilities/parser/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@
EventParserReturnType = TypeVar("EventParserReturnType")
AnyInheritedModel = Union[Type[BaseModel], BaseModel]
RawDictOrModel = Union[Dict[str, Any], AnyInheritedModel]
T = TypeVar("T")

__all__ = ["Json", "Literal"]
26 changes: 10 additions & 16 deletions docs/utilities/parser.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,13 @@ This utility provides data parsing and deep validation using [Pydantic](https://
* Defines data in pure Python classes, then parse, validate and extract only what you want
* Built-in envelopes to unwrap, extend, and validate popular event sources payloads
* Enforces type hints at runtime with user-friendly errors
* Support for Pydantic v1 and v2
* Support for Pydantic v2

## Getting started

### Install

Powertools for AWS Lambda (Python) supports Pydantic v1 and v2. Each Pydantic version requires different dependencies before you can use Parser.

#### Using Pydantic v1

!!! info "This is not necessary if you're installing Powertools for AWS Lambda (Python) via [Lambda Layer/SAR](../index.md#lambda-layer){target="_blank"}"

Add `aws-lambda-powertools[parser]` as a dependency in your preferred tool: _e.g._, _requirements.txt_, _pyproject.toml_.

???+ warning
This will increase the compressed package size by >10MB due to the Pydantic dependency.

To reduce the impact on the package size at the expense of 30%-50% of its performance [Pydantic can also be
installed without binary files](https://pydantic-docs.helpmanual.io/install/#performance-vs-package-size-trade-off){target="_blank" rel="nofollow"}:

Pip example: `SKIP_CYTHON=1 pip install --no-binary pydantic aws-lambda-powertools[parser]`
Powertools for AWS Lambda (Python) supports Pydantic v2.

#### Using Pydantic v2

Expand Down Expand Up @@ -169,6 +155,14 @@ def my_function():
}
```

#### Primitive data model parsing

The parser allows you parse events into primitive data types, such as `dict` or classes that don't inherit from `BaseModel`. The following example shows you how to parse a [`Union`](https://docs.pydantic.dev/latest/api/standard_library_types/#union):

```python
--8<-- "examples/parser/src/multiple_model_parsing.py"
```

### Built-in models

Parser comes with the following built-in models:
Expand Down
4 changes: 2 additions & 2 deletions examples/batch_processing/src/pydantic_dynamodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
EventType,
process_partial_response,
)
from aws_lambda_powertools.utilities.parser import BaseModel, validator
from aws_lambda_powertools.utilities.parser import BaseModel, field_validator
from aws_lambda_powertools.utilities.parser.models import (
DynamoDBStreamChangedRecordModel,
DynamoDBStreamRecordModel,
Expand All @@ -26,7 +26,7 @@ class OrderDynamoDB(BaseModel):

# auto transform json string
# so Pydantic can auto-initialize nested Order model
@validator("Message", pre=True)
@field_validator("Message", mode="before")
def transform_message_to_dict(cls, value: Dict[Literal["S"], str]):
return json.loads(value["S"])

Expand Down
33 changes: 33 additions & 0 deletions examples/parser/src/multiple_model_parsing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from typing import Any, Literal, Union

from pydantic import BaseModel, Field

from aws_lambda_powertools.shared.types import Annotated
from aws_lambda_powertools.utilities.parser import event_parser


class Cat(BaseModel):
animal: Literal["cat"]
name: str
meow: int


class Dog(BaseModel):
animal: Literal["dog"]
name: str
bark: int


Animal = Annotated[
Union[Cat, Dog],
Field(discriminator="animal"),
]


@event_parser(model=Animal)
def lambda_handler(event: Animal, _: Any) -> str:
if isinstance(event, Cat):
# we have a cat!
return f"🐈: {event.name}"

return f"🐶: {event.name}"
Empty file added tests/e2e/parser/__init__.py
Empty file.
19 changes: 19 additions & 0 deletions tests/e2e/parser/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import pytest

from tests.e2e.parser.infrastructure import ParserStack


@pytest.fixture(autouse=True, scope="package")
def infrastructure():
"""Setup and teardown logic for E2E test infrastructure

Yields
------
Dict[str, str]
CloudFormation Outputs from deployed infrastructure
"""
stack = ParserStack()
try:
yield stack.deploy()
finally:
stack.delete()
Loading
Loading