Skip to content

feat: simple JSON Schema validator utility #153

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Sep 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added
- **Utilities**: Add new `Validator` utility to validate inbound events and responses using JSON Schema

## [1.5.0] - 2020-09-04

### Added
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ target:

dev:
pip install --upgrade pip poetry pre-commit
poetry install
poetry install --extras "jmespath"
pre-commit install

dev-docs:
Expand Down
14 changes: 14 additions & 0 deletions aws_lambda_powertools/utilities/validation/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""
Simple validator to enforce incoming/outgoing event conforms with JSON Schema
"""

from .exceptions import InvalidEnvelopeExpressionError, InvalidSchemaFormatError, SchemaValidationError
from .validator import validate, validator

__all__ = [
"validate",
"validator",
"InvalidSchemaFormatError",
"SchemaValidationError",
"InvalidEnvelopeExpressionError",
]
65 changes: 65 additions & 0 deletions aws_lambda_powertools/utilities/validation/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import logging
from typing import Any, Dict

import fastjsonschema
import jmespath
from jmespath.exceptions import LexerError

from .exceptions import InvalidEnvelopeExpressionError, InvalidSchemaFormatError, SchemaValidationError
from .jmespath_functions import PowertoolsFunctions

logger = logging.getLogger(__name__)


def validate_data_against_schema(data: Dict, schema: Dict):
"""Validate dict data against given JSON Schema

Parameters
----------
data : Dict
Data set to be validated
schema : Dict
JSON Schema to validate against

Raises
------
SchemaValidationError
When schema validation fails against data set
InvalidSchemaFormatError
When JSON schema provided is invalid
"""
try:
fastjsonschema.validate(definition=schema, data=data)
except fastjsonschema.JsonSchemaException as e:
message = f"Failed schema validation. Error: {e.message}, Path: {e.path}, Data: {e.value}" # noqa: B306, E501
raise SchemaValidationError(message)
except (TypeError, AttributeError) as e:
raise InvalidSchemaFormatError(f"Schema received: {schema}. Error: {e}")


def unwrap_event_from_envelope(data: Dict, envelope: str, jmespath_options: Dict) -> Any:
"""Searches data using JMESPath expression

Parameters
----------
data : Dict
Data set to be filtered
envelope : str
JMESPath expression to filter data against
jmespath_options : Dict
Alternative JMESPath options to be included when filtering expr

Returns
-------
Any
Data found using JMESPath expression given in envelope
"""
if not jmespath_options:
jmespath_options = {"custom_functions": PowertoolsFunctions()}

try:
logger.debug(f"Envelope detected: {envelope}. JMESPath options: {jmespath_options}")
return jmespath.search(envelope, data, options=jmespath.Options(**jmespath_options))
except (LexerError, TypeError, UnicodeError) as e:
message = f"Failed to unwrap event from envelope using expression. Error: {e} Exp: {envelope}, Data: {data}" # noqa: B306, E501
raise InvalidEnvelopeExpressionError(message)
10 changes: 10 additions & 0 deletions aws_lambda_powertools/utilities/validation/envelopes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
"""Built-in envelopes"""

API_GATEWAY_REST = "powertools_json(body)"
API_GATEWAY_HTTP = API_GATEWAY_REST
SQS = "Records[*].powertools_json(body)"
SNS = "Records[0].Sns.Message | powertools_json(@)"
EVENTBRIDGE = "detail"
CLOUDWATCH_EVENTS_SCHEDULED = EVENTBRIDGE
KINESIS_DATA_STREAM = "Records[*].kinesis.powertools_json(powertools_base64(data))"
CLOUDWATCH_LOGS = "awslogs.powertools_base64_gzip(data) | powertools_json(@).logEvents[*]"
14 changes: 14 additions & 0 deletions aws_lambda_powertools/utilities/validation/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
class SchemaValidationError(Exception):
"""When serialization fail schema validation"""

pass


class InvalidSchemaFormatError(Exception):
"""When JSON Schema is in invalid format"""

pass


class InvalidEnvelopeExpressionError(Exception):
"""When JMESPath fails to parse expression"""
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import base64
import gzip
import json

import jmespath


class PowertoolsFunctions(jmespath.functions.Functions):
@jmespath.functions.signature({"types": ["string"]})
def _func_powertools_json(self, value):
return json.loads(value)

@jmespath.functions.signature({"types": ["string"]})
def _func_powertools_base64(self, value):
return base64.b64decode(value).decode()

@jmespath.functions.signature({"types": ["string"]})
def _func_powertools_base64_gzip(self, value):
encoded = base64.b64decode(value)
uncompressed = gzip.decompress(encoded)

return uncompressed.decode()
204 changes: 204 additions & 0 deletions aws_lambda_powertools/utilities/validation/validator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
import logging
from typing import Any, Callable, Dict, Union

from ...middleware_factory import lambda_handler_decorator
from .base import unwrap_event_from_envelope, validate_data_against_schema

logger = logging.getLogger(__name__)


@lambda_handler_decorator
def validator(
handler: Callable,
event: Union[Dict, str],
context: Any,
inbound_schema: Dict = None,
outbound_schema: Dict = None,
envelope: str = None,
jmespath_options: Dict = None,
) -> Any:
"""Lambda handler decorator to validate incoming/outbound data using a JSON Schema

Example
-------

**Validate incoming event**

from aws_lambda_powertools.utilities.validation import validator

@validator(inbound_schema=json_schema_dict)
def handler(event, context):
return event

**Validate incoming and outgoing event**

from aws_lambda_powertools.utilities.validation import validator

@validator(inbound_schema=json_schema_dict, outbound_schema=response_json_schema_dict)
def handler(event, context):
return event

**Unwrap event before validating against actual payload - using built-in envelopes**

from aws_lambda_powertools.utilities.validation import validator, envelopes

@validator(inbound_schema=json_schema_dict, envelope=envelopes.API_GATEWAY_REST)
def handler(event, context):
return event

**Unwrap event before validating against actual payload - using custom JMESPath expression**

from aws_lambda_powertools.utilities.validation import validator

@validator(inbound_schema=json_schema_dict, envelope="payload[*].my_data")
def handler(event, context):
return event

**Unwrap and deserialize JSON string event before validating against actual payload - using built-in functions**

from aws_lambda_powertools.utilities.validation import validator

@validator(inbound_schema=json_schema_dict, envelope="Records[*].powertools_json(body)")
def handler(event, context):
return event

**Unwrap, decode base64 and deserialize JSON string event before validating against actual payload - using built-in functions** # noqa: E501

from aws_lambda_powertools.utilities.validation import validator

@validator(inbound_schema=json_schema_dict, envelope="Records[*].kinesis.powertools_json(powertools_base64(data))")
def handler(event, context):
return event

**Unwrap, decompress ZIP archive and deserialize JSON string event before validating against actual payload - using built-in functions** # noqa: E501

from aws_lambda_powertools.utilities.validation import validator

@validator(inbound_schema=json_schema_dict, envelope="awslogs.powertools_base64_gzip(data) | powertools_json(@).logEvents[*]")
def handler(event, context):
return event

Parameters
----------
handler : Callable
Method to annotate on
event : Dict
Lambda event to be validated
context : Any
Lambda context object
inbound_schema : Dict
JSON Schema to validate incoming event
outbound_schema : Dict
JSON Schema to validate outbound event
envelope : Dict
JMESPath expression to filter data against
jmespath_options : Dict
Alternative JMESPath options to be included when filtering expr

Returns
-------
Any
Lambda handler response

Raises
------
SchemaValidationError
When schema validation fails against data set
InvalidSchemaFormatError
When JSON schema provided is invalid
InvalidEnvelopeExpressionError
When JMESPath expression to unwrap event is invalid
"""
if envelope:
event = unwrap_event_from_envelope(data=event, envelope=envelope, jmespath_options=jmespath_options)

if inbound_schema:
logger.debug("Validating inbound event")
validate_data_against_schema(data=event, schema=inbound_schema)

response = handler(event, context)

if outbound_schema:
logger.debug("Validating outbound event")
validate_data_against_schema(data=response, schema=outbound_schema)

return response


def validate(event: Dict, schema: Dict = None, envelope: str = None, jmespath_options: Dict = None):
"""Standalone function to validate event data using a JSON Schema

Typically used when you need more control over the validation process.

**Validate event**

from aws_lambda_powertools.utilities.validation import validate

def handler(event, context):
validate(event=event, schema=json_schema_dict)
return event

**Unwrap event before validating against actual payload - using built-in envelopes**

from aws_lambda_powertools.utilities.validation import validate, envelopes

def handler(event, context):
validate(event=event, schema=json_schema_dict, envelope=envelopes.API_GATEWAY_REST)
return event

**Unwrap event before validating against actual payload - using custom JMESPath expression**

from aws_lambda_powertools.utilities.validation import validate

def handler(event, context):
validate(event=event, schema=json_schema_dict, envelope="payload[*].my_data")
return event

**Unwrap and deserialize JSON string event before validating against actual payload - using built-in functions**

from aws_lambda_powertools.utilities.validation import validate

def handler(event, context):
validate(event=event, schema=json_schema_dict, envelope="Records[*].powertools_json(body)")
return event

**Unwrap, decode base64 and deserialize JSON string event before validating against actual payload - using built-in functions**

from aws_lambda_powertools.utilities.validation import validate

def handler(event, context):
validate(event=event, schema=json_schema_dict, envelope="Records[*].kinesis.powertools_json(powertools_base64(data))")
return event

**Unwrap, decompress ZIP archive and deserialize JSON string event before validating against actual payload - using built-in functions** # noqa: E501

from aws_lambda_powertools.utilities.validation import validate

def handler(event, context):
validate(event=event, schema=json_schema_dict, envelope="awslogs.powertools_base64_gzip(data) | powertools_json(@).logEvents[*]")
return event

Parameters
----------
event : Dict
Lambda event to be validated
schema : Dict
JSON Schema to validate incoming event
envelope : Dict
JMESPath expression to filter data against
jmespath_options : Dict
Alternative JMESPath options to be included when filtering expr

Raises
------
SchemaValidationError
When schema validation fails against data set
InvalidSchemaFormatError
When JSON schema provided is invalid
InvalidEnvelopeExpressionError
When JMESPath expression to unwrap event is invalid
"""
if envelope:
event = unwrap_event_from_envelope(data=event, envelope=envelope, jmespath_options=jmespath_options)

validate_data_against_schema(data=event, schema=schema)
Loading