Skip to content

Commit b2dab7b

Browse files
committed
Merge branch 'main' into 50395-1-2
* main: fix(airbyte-cdk): unable to create custom retriever (airbytehq#198) feat(low-code): added keys replace transformation (airbytehq#183) feat: add `min` macros (airbytehq#203) fix(low-code cdk pagination): Fix the offset strategy so that it resets back to 0 when a stream is an incremental data feed (airbytehq#202)
2 parents 1065a20 + e78eaff commit b2dab7b

File tree

14 files changed

+370
-12
lines changed

14 files changed

+370
-12
lines changed

airbyte_cdk/sources/declarative/declarative_component_schema.yaml

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1241,6 +1241,7 @@ definitions:
12411241
- "$ref": "#/definitions/KeysToLower"
12421242
- "$ref": "#/definitions/KeysToSnakeCase"
12431243
- "$ref": "#/definitions/FlattenFields"
1244+
- "$ref": "#/definitions/KeysReplace"
12441245
state_migrations:
12451246
title: State Migrations
12461247
description: Array of state migrations to be applied on the input state
@@ -1785,6 +1786,7 @@ definitions:
17851786
- "$ref": "#/definitions/KeysToLower"
17861787
- "$ref": "#/definitions/KeysToSnakeCase"
17871788
- "$ref": "#/definitions/FlattenFields"
1789+
- "$ref": "#/definitions/KeysReplace"
17881790
schema_type_identifier:
17891791
"$ref": "#/definitions/SchemaTypeIdentifier"
17901792
$parameters:
@@ -1883,6 +1885,49 @@ definitions:
18831885
$parameters:
18841886
type: object
18851887
additionalProperties: true
1888+
KeysReplace:
1889+
title: Keys Replace
1890+
description: A transformation that replaces symbols in keys.
1891+
type: object
1892+
required:
1893+
- type
1894+
- old
1895+
- new
1896+
properties:
1897+
type:
1898+
type: string
1899+
enum: [KeysReplace]
1900+
old:
1901+
type: string
1902+
title: Old value
1903+
description: Old value to replace.
1904+
examples:
1905+
- " "
1906+
- "{{ record.id }}"
1907+
- "{{ config['id'] }}"
1908+
- "{{ stream_slice['id'] }}"
1909+
interpolation_context:
1910+
- config
1911+
- record
1912+
- stream_state
1913+
- stream_slice
1914+
new:
1915+
type: string
1916+
title: New value
1917+
description: New value to set.
1918+
examples:
1919+
- "_"
1920+
- "{{ record.id }}"
1921+
- "{{ config['id'] }}"
1922+
- "{{ stream_slice['id'] }}"
1923+
interpolation_context:
1924+
- config
1925+
- record
1926+
- stream_state
1927+
- stream_slice
1928+
$parameters:
1929+
type: object
1930+
additionalProperties: true
18861931
IterableDecoder:
18871932
title: Iterable Decoder
18881933
description: Use this if the response consists of strings separated by new lines (`\n`). The Decoder will wrap each row into a JSON object with the `record` key.

airbyte_cdk/sources/declarative/interpolation/macros.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,26 @@ def max(*args: typing.Any) -> typing.Any:
9494
return builtins.max(*args)
9595

9696

97+
def min(*args: typing.Any) -> typing.Any:
98+
"""
99+
Returns smallest object of an iterable, or two or more arguments.
100+
101+
min(iterable, *[, default=obj, key=func]) -> value
102+
min(arg1, arg2, *args, *[, key=func]) -> value
103+
104+
Usage:
105+
`"{{ min(2,3) }}"
106+
107+
With a single iterable argument, return its smallest item. The
108+
default keyword-only argument specifies an object to return if
109+
the provided iterable is empty.
110+
With two or more arguments, return the smallest argument.
111+
:param args: args to compare
112+
:return: smallest argument
113+
"""
114+
return builtins.min(*args)
115+
116+
97117
def day_delta(num_days: int, format: str = "%Y-%m-%dT%H:%M:%S.%f%z") -> str:
98118
"""
99119
Returns datetime of now() + num_days
@@ -147,6 +167,7 @@ def format_datetime(
147167
today_utc,
148168
timestamp,
149169
max,
170+
min,
150171
day_delta,
151172
duration,
152173
format_datetime,

airbyte_cdk/sources/declarative/models/declarative_component_schema.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -721,6 +721,23 @@ class KeysToSnakeCase(BaseModel):
721721
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
722722

723723

724+
class KeysReplace(BaseModel):
725+
type: Literal["KeysReplace"]
726+
old: str = Field(
727+
...,
728+
description="Old value to replace.",
729+
examples=[" ", "{{ record.id }}", "{{ config['id'] }}", "{{ stream_slice['id'] }}"],
730+
title="Old value",
731+
)
732+
new: str = Field(
733+
...,
734+
description="New value to set.",
735+
examples=["_", "{{ record.id }}", "{{ config['id'] }}", "{{ stream_slice['id'] }}"],
736+
title="New value",
737+
)
738+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
739+
740+
724741
class FlattenFields(BaseModel):
725742
type: Literal["FlattenFields"]
726743
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
@@ -1701,6 +1718,7 @@ class Config:
17011718
KeysToLower,
17021719
KeysToSnakeCase,
17031720
FlattenFields,
1721+
KeysReplace,
17041722
]
17051723
]
17061724
] = Field(
@@ -1875,6 +1893,7 @@ class DynamicSchemaLoader(BaseModel):
18751893
KeysToLower,
18761894
KeysToSnakeCase,
18771895
FlattenFields,
1896+
KeysReplace,
18781897
]
18791898
]
18801899
] = Field(

airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,9 @@
254254
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
255255
JwtPayload as JwtPayloadModel,
256256
)
257+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
258+
KeysReplace as KeysReplaceModel,
259+
)
257260
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
258261
KeysToLower as KeysToLowerModel,
259262
)
@@ -417,6 +420,9 @@
417420
from airbyte_cdk.sources.declarative.transformations.flatten_fields import (
418421
FlattenFields,
419422
)
423+
from airbyte_cdk.sources.declarative.transformations.keys_replace_transformation import (
424+
KeysReplaceTransformation,
425+
)
420426
from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import (
421427
KeysToLowerTransformation,
422428
)
@@ -509,6 +515,7 @@ def _init_mappings(self) -> None:
509515
GzipParserModel: self.create_gzip_parser,
510516
KeysToLowerModel: self.create_keys_to_lower_transformation,
511517
KeysToSnakeCaseModel: self.create_keys_to_snake_transformation,
518+
KeysReplaceModel: self.create_keys_replace_transformation,
512519
FlattenFieldsModel: self.create_flatten_fields,
513520
IterableDecoderModel: self.create_iterable_decoder,
514521
XmlDecoderModel: self.create_xml_decoder,
@@ -630,6 +637,13 @@ def create_keys_to_snake_transformation(
630637
) -> KeysToSnakeCaseTransformation:
631638
return KeysToSnakeCaseTransformation()
632639

640+
def create_keys_replace_transformation(
641+
self, model: KeysReplaceModel, config: Config, **kwargs: Any
642+
) -> KeysReplaceTransformation:
643+
return KeysReplaceTransformation(
644+
old=model.old, new=model.new, parameters=model.parameters or {}
645+
)
646+
633647
def create_flatten_fields(
634648
self, model: FlattenFieldsModel, config: Config, **kwargs: Any
635649
) -> FlattenFields:
@@ -1560,7 +1574,12 @@ def create_exponential_backoff_strategy(
15601574
)
15611575

15621576
def create_http_requester(
1563-
self, model: HttpRequesterModel, decoder: Decoder, config: Config, *, name: str
1577+
self,
1578+
model: HttpRequesterModel,
1579+
config: Config,
1580+
decoder: Decoder = JsonDecoder(parameters={}),
1581+
*,
1582+
name: str,
15641583
) -> HttpRequester:
15651584
authenticator = (
15661585
self._create_component_from_model(
@@ -1976,9 +1995,9 @@ def create_record_selector(
19761995
config: Config,
19771996
*,
19781997
name: str,
1979-
transformations: List[RecordTransformation],
1980-
decoder: Optional[Decoder] = None,
1981-
client_side_incremental_sync: Optional[Dict[str, Any]] = None,
1998+
transformations: List[RecordTransformation] | None = None,
1999+
decoder: Decoder | None = None,
2000+
client_side_incremental_sync: Dict[str, Any] | None = None,
19822001
**kwargs: Any,
19832002
) -> RecordSelector:
19842003
assert model.schema_normalization is not None # for mypy
@@ -2008,7 +2027,7 @@ def create_record_selector(
20082027
name=name,
20092028
config=config,
20102029
record_filter=record_filter,
2011-
transformations=transformations,
2030+
transformations=transformations or [],
20122031
schema_normalization=schema_normalization,
20132032
parameters=model.parameters or {},
20142033
)

airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,9 @@ def next_page_token(
8383
return self._offset
8484

8585
def reset(self, reset_value: Optional[Any] = 0) -> None:
86-
if not isinstance(reset_value, int):
86+
if reset_value is None:
87+
self._offset = 0
88+
elif not isinstance(reset_value, int):
8789
raise ValueError(
8890
f"Reset value {reset_value} for OffsetIncrement pagination strategy was not an integer"
8991
)

airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,10 @@ def next_page_token(
5353
return self._delegate.next_page_token(response, last_page_size, last_record)
5454

5555
def reset(self, reset_value: Optional[Any] = None) -> None:
56-
self._delegate.reset(reset_value)
56+
if reset_value:
57+
self._delegate.reset(reset_value)
58+
else:
59+
self._delegate.reset()
5760

5861
def get_page_size(self) -> Optional[int]:
5962
return self._delegate.get_page_size()
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
#
2+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3+
#
4+
5+
from dataclasses import InitVar, dataclass
6+
from typing import Any, Dict, Mapping, Optional
7+
8+
from airbyte_cdk import InterpolatedString
9+
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
10+
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
11+
12+
13+
@dataclass
14+
class KeysReplaceTransformation(RecordTransformation):
15+
"""
16+
Transformation that applies keys names replacement.
17+
18+
Example usage:
19+
- type: KeysReplace
20+
old: " "
21+
new: "_"
22+
Result:
23+
from: {"created time": ..., "customer id": ..., "user id": ...}
24+
to: {"created_time": ..., "customer_id": ..., "user_id": ...}
25+
"""
26+
27+
old: str
28+
new: str
29+
parameters: InitVar[Mapping[str, Any]]
30+
31+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
32+
self._old = InterpolatedString.create(self.old, parameters=parameters)
33+
self._new = InterpolatedString.create(self.new, parameters=parameters)
34+
35+
def transform(
36+
self,
37+
record: Dict[str, Any],
38+
config: Optional[Config] = None,
39+
stream_state: Optional[StreamState] = None,
40+
stream_slice: Optional[StreamSlice] = None,
41+
) -> None:
42+
if config is None:
43+
config = {}
44+
45+
kwargs = {"record": record, "stream_state": stream_state, "stream_slice": stream_slice}
46+
old_key = str(self._old.eval(config, **kwargs))
47+
new_key = str(self._new.eval(config, **kwargs))
48+
49+
def _transform(data: Dict[str, Any]) -> Dict[str, Any]:
50+
result = {}
51+
for key, value in data.items():
52+
updated_key = key.replace(old_key, new_key)
53+
if isinstance(value, dict):
54+
result[updated_key] = _transform(value)
55+
else:
56+
result[updated_key] = value
57+
return result
58+
59+
transformed_record = _transform(record)
60+
record.clear()
61+
record.update(transformed_record)

unit_tests/sources/declarative/interpolation/test_jinja.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ def test_to_string(test_name, input_value, expected_output):
184184
id="test_timestamp_from_rfc3339",
185185
),
186186
pytest.param("{{ max(1,2) }}", 2, id="test_max"),
187+
pytest.param("{{ min(1,2) }}", 1, id="test_min"),
187188
],
188189
)
189190
def test_macros(s, expected_value):
@@ -291,6 +292,8 @@ def test_undeclared_variables(template_string, expected_error, expected_value):
291292
),
292293
pytest.param("{{ max(2, 3) }}", 3, id="test_max_with_arguments"),
293294
pytest.param("{{ max([2, 3]) }}", 3, id="test_max_with_list"),
295+
pytest.param("{{ min(2, 3) }}", 2, id="test_min_with_arguments"),
296+
pytest.param("{{ min([2, 3]) }}", 2, id="test_min_with_list"),
294297
pytest.param("{{ day_delta(1) }}", "2021-09-02T00:00:00.000000+0000", id="test_day_delta"),
295298
pytest.param(
296299
"{{ day_delta(-1) }}", "2021-08-31T00:00:00.000000+0000", id="test_day_delta_negative"

unit_tests/sources/declarative/interpolation/test_macros.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
("test_now_utc", "now_utc", True),
1616
("test_today_utc", "today_utc", True),
1717
("test_max", "max", True),
18+
("test_min", "min", True),
1819
("test_day_delta", "day_delta", True),
1920
("test_format_datetime", "format_datetime", True),
2021
("test_duration", "duration", True),

unit_tests/sources/declarative/parsers/test_model_to_component_factory.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2634,6 +2634,42 @@ def test_create_custom_schema_loader():
26342634
assert isinstance(component, MyCustomSchemaLoader)
26352635

26362636

2637+
class MyCustomRetriever(SimpleRetriever):
2638+
pass
2639+
2640+
2641+
def test_create_custom_retriever():
2642+
stream_model = {
2643+
"type": "DeclarativeStream",
2644+
"retriever": {
2645+
"type": "CustomRetriever",
2646+
"class_name": "unit_tests.sources.declarative.parsers.test_model_to_component_factory.MyCustomRetriever",
2647+
"record_selector": {
2648+
"type": "RecordSelector",
2649+
"extractor": {
2650+
"type": "DpathExtractor",
2651+
"field_path": [],
2652+
},
2653+
"$parameters": {"name": ""},
2654+
},
2655+
"requester": {
2656+
"type": "HttpRequester",
2657+
"name": "list",
2658+
"url_base": "orange.com",
2659+
"path": "/v1/api",
2660+
"$parameters": {"name": ""},
2661+
},
2662+
},
2663+
}
2664+
2665+
stream = factory.create_component(
2666+
model_type=DeclarativeStreamModel, component_definition=stream_model, config=input_config
2667+
)
2668+
2669+
assert isinstance(stream, DeclarativeStream)
2670+
assert isinstance(stream.retriever, MyCustomRetriever)
2671+
2672+
26372673
@freezegun.freeze_time("2021-01-01 00:00:00")
26382674
@pytest.mark.parametrize(
26392675
"config, manifest, expected",

0 commit comments

Comments
 (0)