1
1
#
2
2
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
#
4
+ from datetime import datetime , timedelta , timezone
4
5
from typing import List , Mapping , Optional
6
+ from unittest .mock import Mock
5
7
6
8
import pytest
7
9
11
13
RecordFilter ,
12
14
)
13
15
from airbyte_cdk .sources .declarative .incremental import (
14
- CursorFactory ,
15
- DatetimeBasedCursor ,
16
- GlobalSubstreamCursor ,
17
- PerPartitionWithGlobalCursor ,
16
+ ConcurrentPerPartitionCursor ,
17
+ ConcurrentCursorFactory ,
18
18
)
19
+ from airbyte_cdk .sources .streams .concurrent .cursor import ConcurrentCursor , CursorField
19
20
from airbyte_cdk .sources .declarative .interpolation import InterpolatedString
20
21
from airbyte_cdk .sources .declarative .models import (
21
22
CustomRetriever ,
24
25
)
25
26
from airbyte_cdk .sources .declarative .partition_routers import SubstreamPartitionRouter
26
27
from airbyte_cdk .sources .declarative .types import StreamSlice
28
+ from airbyte_cdk .sources .streams .concurrent .state_converters .datetime_stream_state_converter import (
29
+ CustomFormatConcurrentStreamStateConverter ,
30
+ )
27
31
from airbyte_cdk .sources .types import Record
32
+ from airbyte_cdk .utils .datetime_helpers import ab_datetime_parse , ab_datetime_now
28
33
29
34
DATE_FORMAT = "%Y-%m-%d"
30
35
RECORDS_TO_FILTER_DATE_FORMAT = [
@@ -272,25 +277,27 @@ def test_client_side_record_filter_decorator_no_parent_stream(
272
277
records_to_filter : List [Mapping ],
273
278
expected_record_ids : List [int ],
274
279
):
275
- date_time_based_cursor = DatetimeBasedCursor (
276
- start_datetime = MinMaxDatetime (
277
- datetime = "2021-01-01" , datetime_format = DATE_FORMAT , parameters = {}
280
+ datetime_based_cursor = ConcurrentCursor (
281
+ stream_name = "any_stream" ,
282
+ stream_namespace = None ,
283
+ stream_state = stream_state ,
284
+ message_repository = Mock (),
285
+ connector_state_manager = Mock (),
286
+ connector_state_converter = CustomFormatConcurrentStreamStateConverter (
287
+ datetime_format = datetime_format
278
288
),
279
- end_datetime = MinMaxDatetime (datetime = end_datetime , parameters = {}) if end_datetime else None ,
280
- step = "P10Y" ,
281
- cursor_field = InterpolatedString .create ("created_at" , parameters = {}),
282
- datetime_format = datetime_format ,
283
- cursor_granularity = "P1D" ,
284
- config = {},
285
- parameters = {},
289
+ cursor_field = CursorField ("created_at" ),
290
+ slice_boundary_fields = ("start" , "end" ),
291
+ start = datetime (2021 , 1 , 1 , tzinfo = timezone .utc ),
292
+ end_provider = lambda : ab_datetime_parse (end_datetime ) if end_datetime else ab_datetime_now (),
293
+ slice_range = timedelta (days = 365 * 10 ),
286
294
)
287
- date_time_based_cursor .set_initial_state (stream_state )
288
295
289
296
record_filter_decorator = ClientSideIncrementalRecordFilterDecorator (
290
297
config = {},
291
298
condition = record_filter_expression ,
292
299
parameters = {},
293
- cursor = date_time_based_cursor ,
300
+ cursor = datetime_based_cursor ,
294
301
)
295
302
296
303
filtered_records = list (
@@ -341,7 +348,7 @@ def test_client_side_record_filter_decorator_no_parent_stream(
341
348
}
342
349
],
343
350
},
344
- "per_partition_with_global " ,
351
+ "global_substream " ,
345
352
[2 , 3 ],
346
353
),
347
354
# Use PerPartitionWithGlobalCursor with partition state missing, global cursor used
@@ -363,23 +370,26 @@ def test_client_side_record_filter_decorator_no_parent_stream(
363
370
def test_client_side_record_filter_decorator_with_cursor_types (
364
371
stream_state : Optional [Mapping ], cursor_type : str , expected_record_ids : List [int ]
365
372
):
366
- def date_time_based_cursor_factory () -> DatetimeBasedCursor :
367
- return DatetimeBasedCursor (
368
- start_datetime = MinMaxDatetime (
369
- datetime = "2021-01-01" , datetime_format = DATE_FORMAT , parameters = {}
370
- ),
371
- end_datetime = MinMaxDatetime (
372
- datetime = "2021-01-05" , datetime_format = DATE_FORMAT , parameters = {}
373
+ def date_time_based_cursor_factory (stream_state , runtime_lookback_window ) -> ConcurrentCursor :
374
+ return ConcurrentCursor (
375
+ stream_name = "any_stream" ,
376
+ stream_namespace = None ,
377
+ stream_state = stream_state ,
378
+ message_repository = Mock (),
379
+ connector_state_manager = Mock (),
380
+ connector_state_converter = CustomFormatConcurrentStreamStateConverter (
381
+ datetime_format = DATE_FORMAT
373
382
),
374
- step = "P10Y" ,
375
- cursor_field = InterpolatedString .create ("created_at" , parameters = {}),
376
- datetime_format = DATE_FORMAT ,
377
- cursor_granularity = "P1D" ,
378
- config = {},
379
- parameters = {},
383
+ cursor_field = CursorField ("created_at" ),
384
+ slice_boundary_fields = ("start" , "end" ),
385
+ start = datetime (2021 , 1 , 1 , tzinfo = timezone .utc ),
386
+ end_provider = lambda : datetime (2021 , 1 , 5 , tzinfo = timezone .utc ),
387
+ slice_range = timedelta (days = 365 * 10 ),
388
+ cursor_granularity = timedelta (days = 1 ),
389
+ lookback_window = runtime_lookback_window ,
380
390
)
381
391
382
- date_time_based_cursor = date_time_based_cursor_factory ()
392
+ date_time_based_cursor = date_time_based_cursor_factory (stream_state , timedelta ( 0 ) )
383
393
384
394
substream_cursor = None
385
395
partition_router = SubstreamPartitionRouter (
@@ -401,29 +411,26 @@ def date_time_based_cursor_factory() -> DatetimeBasedCursor:
401
411
if cursor_type == "datetime" :
402
412
# Use only DatetimeBasedCursor
403
413
pass # No additional cursor needed
404
- elif cursor_type == "global_substream" :
414
+ elif cursor_type in [ "global_substream" , "per_partition_with_global" ] :
405
415
# Create GlobalSubstreamCursor instance
406
- substream_cursor = GlobalSubstreamCursor (
407
- stream_cursor = date_time_based_cursor ,
416
+ substream_cursor = ConcurrentPerPartitionCursor (
417
+ cursor_factory = ConcurrentCursorFactory ( date_time_based_cursor_factory ) ,
408
418
partition_router = partition_router ,
409
- )
410
- if stream_state :
411
- substream_cursor .set_initial_state (stream_state )
412
- elif cursor_type == "per_partition_with_global" :
413
- # Create PerPartitionWithGlobalCursor instance
414
- substream_cursor = PerPartitionWithGlobalCursor (
415
- cursor_factory = CursorFactory (date_time_based_cursor_factory ),
416
- partition_router = partition_router ,
417
- stream_cursor = date_time_based_cursor ,
419
+ stream_name = "a_stream" ,
420
+ stream_namespace = None ,
421
+ stream_state = stream_state ,
422
+ message_repository = Mock (),
423
+ connector_state_manager = Mock (),
424
+ connector_state_converter = CustomFormatConcurrentStreamStateConverter (
425
+ datetime_format = DATE_FORMAT
426
+ ),
427
+ cursor_field = CursorField ("created_at" ),
428
+ use_global_cursor = cursor_type == "global_substream" ,
429
+ attempt_to_create_cursor_if_not_provided = True ,
418
430
)
419
431
else :
420
432
raise ValueError (f"Unsupported cursor type: { cursor_type } " )
421
433
422
- if substream_cursor and stream_state :
423
- substream_cursor .set_initial_state (stream_state )
424
- elif stream_state :
425
- date_time_based_cursor .set_initial_state (stream_state )
426
-
427
434
# Create the record_filter_decorator with appropriate cursor
428
435
record_filter_decorator = ClientSideIncrementalRecordFilterDecorator (
429
436
config = {},
0 commit comments