Skip to content

Commit 271f560

Browse files
authoredMar 4, 2021
refactor(metrics): optimize validation and serialization (#307)
* feat: allow metric validation to be disabled * refactor: use native python validation over JSON Schema * test: adjust perf bar to flaky/CI machines * chore: flaky CI * chore: update lock to test flaky CI build * chore: ignore 3.6 to test flaky CI * chore: re-add 3.6 to test flaky CI
1 parent b4d0baa commit 271f560

File tree

8 files changed

+373
-353
lines changed

8 files changed

+373
-353
lines changed
 

‎aws_lambda_powertools/metrics/base.py

+12-11
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,12 @@
99

1010
from ..shared import constants
1111
from ..shared.functions import resolve_env_var_choice
12-
from ..shared.lazy_import import LazyLoader
1312
from .exceptions import MetricUnitError, MetricValueError, SchemaValidationError
14-
from .schema import CLOUDWATCH_EMF_SCHEMA
1513

16-
fastjsonschema = LazyLoader("fastjsonschema", globals(), "fastjsonschema")
1714
logger = logging.getLogger(__name__)
1815

1916
MAX_METRICS = 100
17+
MAX_DIMENSIONS = 9
2018

2119

2220
class MetricUnit(Enum):
@@ -180,6 +178,12 @@ def serialize_metric_set(self, metrics: Dict = None, dimensions: Dict = None, me
180178
if self.service and not self.dimension_set.get("service"):
181179
self.dimension_set["service"] = self.service
182180

181+
if len(metrics) == 0:
182+
raise SchemaValidationError("Must contain at least one metric.")
183+
184+
if self.namespace is None:
185+
raise SchemaValidationError("Must contain a metric namespace.")
186+
183187
logger.debug({"details": "Serializing metrics", "metrics": metrics, "dimensions": dimensions})
184188

185189
metric_names_and_units: List[Dict[str, str]] = [] # [ { "Name": "metric_name", "Unit": "Count" } ]
@@ -209,12 +213,6 @@ def serialize_metric_set(self, metrics: Dict = None, dimensions: Dict = None, me
209213
**metric_names_and_values, # "single_metric": 1.0
210214
}
211215

212-
try:
213-
logger.debug("Validating serialized metrics against CloudWatch EMF schema")
214-
fastjsonschema.validate(definition=CLOUDWATCH_EMF_SCHEMA, data=embedded_metrics_object)
215-
except fastjsonschema.JsonSchemaException as e:
216-
message = f"Invalid format. Error: {e.message}, Invalid item: {e.name}" # noqa: B306, E501
217-
raise SchemaValidationError(message)
218216
return embedded_metrics_object
219217

220218
def add_dimension(self, name: str, value: str):
@@ -234,7 +232,10 @@ def add_dimension(self, name: str, value: str):
234232
Dimension value
235233
"""
236234
logger.debug(f"Adding dimension: {name}:{value}")
237-
235+
if len(self.dimension_set) == 9:
236+
raise SchemaValidationError(
237+
f"Maximum number of dimensions exceeded ({MAX_DIMENSIONS}): Unable to add dimension {name}."
238+
)
238239
# Cast value to str according to EMF spec
239240
# Majority of values are expected to be string already, so
240241
# checking before casting improves performance in most cases
@@ -295,7 +296,7 @@ def __extract_metric_unit_value(self, unit: Union[str, MetricUnit]) -> str:
295296
if unit in self._metric_unit_options:
296297
unit = MetricUnit[unit].value
297298

298-
if unit not in self._metric_units: # str correta
299+
if unit not in self._metric_units:
299300
raise MetricUnitError(
300301
f"Invalid metric unit '{unit}', expected either option: {self._metric_unit_options}"
301302
)

‎aws_lambda_powertools/metrics/metric.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -102,8 +102,12 @@ def single_metric(name: str, unit: MetricUnit, value: float, namespace: str = No
102102
103103
Raises
104104
------
105-
e
106-
Propagate error received
105+
MetricUnitError
106+
When metric metric isn't supported by CloudWatch
107+
MetricValueError
108+
When metric value isn't a number
109+
SchemaValidationError
110+
When metric object fails EMF schema validation
107111
"""
108112
metric_set = None
109113
try:
@@ -112,4 +116,4 @@ def single_metric(name: str, unit: MetricUnit, value: float, namespace: str = No
112116
yield metric
113117
metric_set: Dict = metric.serialize_metric_set()
114118
finally:
115-
print(json.dumps(metric_set))
119+
print(json.dumps(metric_set, separators=(",", ":")))

‎aws_lambda_powertools/metrics/metrics.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,19 @@ def do_something():
5757
5858
Parameters
5959
----------
60-
MetricManager : MetricManager
61-
Inherits from `aws_lambda_powertools.metrics.base.MetricManager`
60+
service : str, optional
61+
service name to be used as metric dimension, by default "service_undefined"
62+
namespace : str
63+
Namespace for metrics
6264
6365
Raises
6466
------
65-
e
66-
Propagate error received
67+
MetricUnitError
68+
When metric metric isn't supported by CloudWatch
69+
MetricValueError
70+
When metric value isn't a number
71+
SchemaValidationError
72+
When metric object fails EMF schema validation
6773
"""
6874

6975
_metrics = {}
@@ -150,7 +156,7 @@ def decorate(event, context):
150156
else:
151157
metrics = self.serialize_metric_set()
152158
self.clear_metrics()
153-
print(json.dumps(metrics))
159+
print(json.dumps(metrics, separators=(",", ":")))
154160

155161
return response
156162

‎aws_lambda_powertools/metrics/schema.py

-94
This file was deleted.

‎poetry.lock

+233-235
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎tests/functional/test_metrics.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def metadata() -> Dict[str, str]:
7777

7878

7979
@pytest.fixture
80-
def a_hundred_metrics(namespace=namespace) -> List[Dict[str, str]]:
80+
def a_hundred_metrics() -> List[Dict[str, str]]:
8181
return [{"name": f"metric_{i}", "unit": "Count", "value": 1} for i in range(100)]
8282

8383

@@ -257,7 +257,7 @@ def test_schema_validation_no_namespace(metric, dimension):
257257
# GIVEN we don't add any namespace
258258
# WHEN we attempt to serialize a valid EMF object
259259
# THEN it should fail namespace validation
260-
with pytest.raises(SchemaValidationError, match=".*Namespace must be string"):
260+
with pytest.raises(SchemaValidationError, match="Must contain a metric namespace."):
261261
with single_metric(**metric) as my_metric:
262262
my_metric.add_dimension(**dimension)
263263

@@ -278,7 +278,7 @@ def test_schema_no_metrics(service, namespace):
278278
my_metrics = Metrics(service=service, namespace=namespace)
279279

280280
# THEN it should fail validation and raise SchemaValidationError
281-
with pytest.raises(SchemaValidationError, match=".*Metrics must contain at least 1 items"):
281+
with pytest.raises(SchemaValidationError, match="Must contain at least one metric."):
282282
my_metrics.serialize_metric_set()
283283

284284

@@ -288,7 +288,7 @@ def test_exceed_number_of_dimensions(metric, namespace):
288288

289289
# WHEN we attempt to serialize them into a valid EMF object
290290
# THEN it should fail validation and raise SchemaValidationError
291-
with pytest.raises(SchemaValidationError, match="must contain less than or equal to 9 items"):
291+
with pytest.raises(SchemaValidationError, match="Maximum number of dimensions exceeded.*"):
292292
with single_metric(**metric, namespace=namespace) as my_metric:
293293
for dimension in dimensions:
294294
my_metric.add_dimension(**dimension)
@@ -328,7 +328,7 @@ def lambda_handler(evt, context):
328328

329329
# THEN the raised exception should be SchemaValidationError
330330
# and specifically about the lack of Metrics
331-
with pytest.raises(SchemaValidationError, match=".*Metrics must contain at least 1 items"):
331+
with pytest.raises(SchemaValidationError, match="Must contain at least one metric."):
332332
lambda_handler({}, {})
333333

334334

‎tests/performance/conftest.py

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import time
2+
from contextlib import contextmanager
3+
from typing import Generator
4+
5+
6+
@contextmanager
7+
def timing() -> Generator:
8+
""" "Generator to quickly time operations. It can add 5ms so take that into account in elapsed time
9+
10+
Examples
11+
--------
12+
13+
with timing() as t:
14+
print("something")
15+
elapsed = t()
16+
"""
17+
start = time.perf_counter()
18+
yield lambda: time.perf_counter() - start # gen as lambda to calculate elapsed time

‎tests/performance/test_metrics.py

+87
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import json
2+
import time
3+
from contextlib import contextmanager
4+
from typing import Dict, Generator
5+
6+
import pytest
7+
8+
from aws_lambda_powertools import Metrics
9+
from aws_lambda_powertools.metrics import MetricUnit
10+
from aws_lambda_powertools.metrics import metrics as metrics_global
11+
12+
# adjusted for slower machines in CI too
13+
METRICS_VALIDATION_SLA: float = 0.0013
14+
METRICS_SERIALIZATION_SLA: float = 0.0013
15+
16+
17+
@contextmanager
18+
def timing() -> Generator:
19+
""" "Generator to quickly time operations. It can add 5ms so take that into account in elapsed time
20+
21+
Examples
22+
--------
23+
24+
with timing() as t:
25+
print("something")
26+
elapsed = t()
27+
"""
28+
start = time.perf_counter()
29+
yield lambda: time.perf_counter() - start # gen as lambda to calculate elapsed time
30+
31+
32+
@pytest.fixture(scope="function", autouse=True)
33+
def reset_metric_set():
34+
metrics = Metrics()
35+
metrics.clear_metrics()
36+
metrics_global.is_cold_start = True # ensure each test has cold start
37+
yield
38+
39+
40+
@pytest.fixture
41+
def namespace() -> str:
42+
return "test_namespace"
43+
44+
45+
@pytest.fixture
46+
def metric() -> Dict[str, str]:
47+
return {"name": "single_metric", "unit": MetricUnit.Count, "value": 1}
48+
49+
50+
def add_max_metrics_before_serialization(metrics_instance: Metrics):
51+
metrics_instance.add_dimension(name="test_dimension", value="test")
52+
53+
for i in range(99):
54+
metrics_instance.add_metric(name=f"metric_{i}", unit="Count", value=1)
55+
56+
57+
@pytest.mark.perf
58+
def test_metrics_large_operation_without_json_serialization_sla(namespace):
59+
# GIVEN Metrics is initialized
60+
my_metrics = Metrics(namespace=namespace)
61+
62+
# WHEN we add and serialize 99 metrics
63+
with timing() as t:
64+
add_max_metrics_before_serialization(metrics_instance=my_metrics)
65+
my_metrics.serialize_metric_set()
66+
67+
# THEN completion time should be below our validation SLA
68+
elapsed = t()
69+
if elapsed > METRICS_VALIDATION_SLA:
70+
pytest.fail(f"Metric validation should be below {METRICS_VALIDATION_SLA}s: {elapsed}")
71+
72+
73+
@pytest.mark.perf
74+
def test_metrics_large_operation_and_json_serialization_sla(namespace):
75+
# GIVEN Metrics is initialized with validation disabled
76+
my_metrics = Metrics(namespace=namespace)
77+
78+
# WHEN we add and serialize 99 metrics
79+
with timing() as t:
80+
add_max_metrics_before_serialization(metrics_instance=my_metrics)
81+
metrics = my_metrics.serialize_metric_set()
82+
print(json.dumps(metrics, separators=(",", ":")))
83+
84+
# THEN completion time should be below our serialization SLA
85+
elapsed = t()
86+
if elapsed > METRICS_SERIALIZATION_SLA:
87+
pytest.fail(f"Metric serialization should be below {METRICS_SERIALIZATION_SLA}s: {elapsed}")

0 commit comments

Comments
 (0)
Please sign in to comment.