|
1 | 1 | #
|
2 | 2 | # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3 | 3 | #
|
| 4 | +from copy import deepcopy |
4 | 5 |
|
5 | 6 | # mypy: ignore-errors
|
6 | 7 | from datetime import datetime, timedelta, timezone
|
7 |
| -from typing import Any, Iterable, Mapping |
| 8 | +from pathlib import Path |
| 9 | +from typing import Any, Iterable, Mapping, Optional, Union |
8 | 10 |
|
9 | 11 | import freezegun
|
10 | 12 | import pytest
|
11 | 13 | import requests
|
| 14 | +from freezegun.api import FakeDatetime |
12 | 15 | from pydantic.v1 import ValidationError
|
13 | 16 |
|
14 | 17 | from airbyte_cdk import AirbyteTracedException
|
|
42 | 45 | ClientSideIncrementalRecordFilterDecorator,
|
43 | 46 | )
|
44 | 47 | from airbyte_cdk.sources.declarative.incremental import (
|
| 48 | + ConcurrentPerPartitionCursor, |
45 | 49 | CursorFactory,
|
46 | 50 | DatetimeBasedCursor,
|
47 | 51 | PerPartitionCursor,
|
|
166 | 170 | MonthClampingStrategy,
|
167 | 171 | WeekClampingStrategy,
|
168 | 172 | )
|
169 |
| -from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor |
| 173 | +from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField |
170 | 174 | from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
|
171 | 175 | CustomFormatConcurrentStreamStateConverter,
|
172 | 176 | )
|
|
190 | 194 | input_config = {"apikey": "verysecrettoken", "repos": ["airbyte", "airbyte-cloud"]}
|
191 | 195 |
|
192 | 196 |
|
| 197 | +def get_factory_with_parameters( |
| 198 | + connector_state_manager: Optional[ConnectorStateManager] = None, |
| 199 | +) -> ModelToComponentFactory: |
| 200 | + return ModelToComponentFactory( |
| 201 | + connector_state_manager=connector_state_manager, |
| 202 | + ) |
| 203 | + |
| 204 | + |
| 205 | +def read_yaml_file(resource_path: Union[str, Path]) -> str: |
| 206 | + yaml_path = Path(__file__).parent / resource_path |
| 207 | + with open(yaml_path, "r") as file: |
| 208 | + content = file.read() |
| 209 | + return content |
| 210 | + |
| 211 | + |
193 | 212 | def test_create_check_stream():
|
194 | 213 | manifest = {"check": {"type": "CheckStream", "stream_names": ["list_stream"]}}
|
195 | 214 |
|
@@ -925,6 +944,97 @@ def test_stream_with_incremental_and_retriever_with_partition_router():
|
925 | 944 | assert list_stream_slicer._cursor_field.string == "a_key"
|
926 | 945 |
|
927 | 946 |
|
| 947 | +@freezegun.freeze_time("2025-05-14") |
| 948 | +def test_stream_with_incremental_and_async_retriever_with_partition_router(): |
| 949 | + content = read_yaml_file( |
| 950 | + "resources/stream_with_incremental_and_aync_retriever_with_partition_router.yaml" |
| 951 | + ) |
| 952 | + parsed_manifest = YamlDeclarativeSource._parse(content) |
| 953 | + resolved_manifest = resolver.preprocess_manifest(parsed_manifest) |
| 954 | + stream_manifest = transformer.propagate_types_and_parameters( |
| 955 | + "", resolved_manifest["list_stream"], {} |
| 956 | + ) |
| 957 | + cursor_time_period_value = "2025-05-06T12:00:00+0000" |
| 958 | + cursor_field_key = "TimePeriod" |
| 959 | + parent_user_id = "102023653" |
| 960 | + per_partition_key = { |
| 961 | + "account_id": 999999999, |
| 962 | + "parent_slice": {"parent_slice": {}, "user_id": parent_user_id}, |
| 963 | + } |
| 964 | + stream_state = { |
| 965 | + "use_global_cursor": False, |
| 966 | + "states": [ |
| 967 | + {"partition": per_partition_key, "cursor": {cursor_field_key: cursor_time_period_value}} |
| 968 | + ], |
| 969 | + "state": {cursor_field_key: "2025-05-12T12:00:00+0000"}, |
| 970 | + "lookback_window": 46, |
| 971 | + } |
| 972 | + connector_state_manager = ConnectorStateManager( |
| 973 | + state=[ |
| 974 | + AirbyteStateMessage( |
| 975 | + type=AirbyteStateType.STREAM, |
| 976 | + stream=AirbyteStreamState( |
| 977 | + stream_descriptor=StreamDescriptor(name="lists"), |
| 978 | + stream_state=AirbyteStateBlob(stream_state), |
| 979 | + ), |
| 980 | + ) |
| 981 | + ] |
| 982 | + ) |
| 983 | + |
| 984 | + factory_with_parameters = get_factory_with_parameters( |
| 985 | + connector_state_manager=connector_state_manager |
| 986 | + ) |
| 987 | + connector_config = deepcopy(input_config) |
| 988 | + connector_config["reports_start_date"] = "2025-01-01" |
| 989 | + stream = factory_with_parameters.create_component( |
| 990 | + model_type=DeclarativeStreamModel, |
| 991 | + component_definition=stream_manifest, |
| 992 | + config=connector_config, |
| 993 | + ) |
| 994 | + |
| 995 | + assert isinstance(stream, DeclarativeStream) |
| 996 | + assert isinstance(stream.retriever, AsyncRetriever) |
| 997 | + stream_slicer = stream.retriever.stream_slicer.stream_slicer |
| 998 | + assert isinstance(stream_slicer, ConcurrentPerPartitionCursor) |
| 999 | + assert stream_slicer.state == stream_state |
| 1000 | + import json |
| 1001 | + |
| 1002 | + cursor_perpartition = stream_slicer._cursor_per_partition |
| 1003 | + expected_cursor_perpartition_key = json.dumps(per_partition_key, sort_keys=True).replace( |
| 1004 | + " ", "" |
| 1005 | + ) |
| 1006 | + assert ( |
| 1007 | + cursor_perpartition[expected_cursor_perpartition_key].cursor_field.cursor_field_key |
| 1008 | + == cursor_field_key |
| 1009 | + ) |
| 1010 | + assert cursor_perpartition[expected_cursor_perpartition_key].start == datetime( |
| 1011 | + 2025, 5, 6, 12, 0, tzinfo=timezone.utc |
| 1012 | + ) |
| 1013 | + assert ( |
| 1014 | + cursor_perpartition[expected_cursor_perpartition_key].state[cursor_field_key] |
| 1015 | + == cursor_time_period_value |
| 1016 | + ) |
| 1017 | + |
| 1018 | + concurrent_cursor = cursor_perpartition[expected_cursor_perpartition_key] |
| 1019 | + assert concurrent_cursor._concurrent_state == { |
| 1020 | + "legacy": {cursor_field_key: cursor_time_period_value}, |
| 1021 | + "slices": [ |
| 1022 | + { |
| 1023 | + "end": FakeDatetime(2025, 5, 6, 12, 0, tzinfo=timezone.utc), |
| 1024 | + "most_recent_cursor_value": FakeDatetime(2025, 5, 6, 12, 0, tzinfo=timezone.utc), |
| 1025 | + "start": FakeDatetime(2025, 1, 1, 0, 0, tzinfo=timezone.utc), |
| 1026 | + } |
| 1027 | + ], |
| 1028 | + "state_type": "date-range", |
| 1029 | + } |
| 1030 | + |
| 1031 | + stream_slices = list(concurrent_cursor.stream_slices()) |
| 1032 | + expected_stream_slices = [ |
| 1033 | + {"start_time": cursor_time_period_value, "end_time": "2025-05-14T00:00:00+0000"} |
| 1034 | + ] |
| 1035 | + assert stream_slices == expected_stream_slices |
| 1036 | + |
| 1037 | + |
928 | 1038 | def test_resumable_full_refresh_stream():
|
929 | 1039 | content = """
|
930 | 1040 | decoder:
|
|
0 commit comments