1
1
import copy
2
2
3
- from airbyte_cdk .sources .streams .concurrent .cursor import Cursor
4
-
5
3
#
6
4
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
7
5
#
8
-
9
6
import logging
10
7
from collections import OrderedDict
11
- from typing import Any , Callable , Iterable , Mapping , MutableMapping , Optional , Union
8
+ from typing import Any , Callable , Iterable , Mapping , MutableMapping , Optional
12
9
10
+ from airbyte_cdk .sources .connector_state_manager import ConnectorStateManager
13
11
from airbyte_cdk .sources .declarative .incremental .declarative_cursor import DeclarativeCursor
14
12
from airbyte_cdk .sources .declarative .partition_routers .partition_router import PartitionRouter
13
+ from airbyte_cdk .sources .message import MessageRepository
15
14
from airbyte_cdk .sources .streams .checkpoint .per_partition_key_serializer import (
16
15
PerPartitionKeySerializer ,
17
16
)
18
- from airbyte_cdk .sources .streams .concurrent .cursor import Cursor , CursorField , CursorValueType , GapType
17
+ from airbyte_cdk .sources .streams .concurrent .cursor import Cursor , CursorField
19
18
from airbyte_cdk .sources .streams .concurrent .partitions .partition import Partition
20
19
from airbyte_cdk .sources .types import Record , StreamSlice , StreamState
21
- import functools
22
- from abc import ABC , abstractmethod
23
- from typing import Any , Callable , Iterable , List , Mapping , MutableMapping , Optional , Protocol , Tuple
24
-
25
- from airbyte_cdk .sources .connector_state_manager import ConnectorStateManager
26
- from airbyte_cdk .sources .message import MessageRepository
27
- from airbyte_cdk .sources .streams import NO_CURSOR_STATE_KEY
28
- from airbyte_cdk .sources .streams .concurrent .partitions .partition import Partition
29
- from airbyte_cdk .sources .streams .concurrent .partitions .record import Record
30
- from airbyte_cdk .sources .streams .concurrent .partitions .stream_slicer import StreamSlicer
31
- from airbyte_cdk .sources .streams .concurrent .state_converters .abstract_stream_state_converter import (
32
- AbstractStreamStateConverter ,
33
- )
34
- from airbyte_cdk .sources .types import StreamSlice
35
20
36
21
logger = logging .getLogger ("airbyte" )
37
22
38
23
39
24
class ConcurrentCursorFactory :
40
- def __init__ (self , create_function : Callable [[ Mapping [ str , Any ]], DeclarativeCursor ]):
25
+ def __init__ (self , create_function : Callable [..., tuple [ Cursor , ...] ]):
41
26
self ._create_function = create_function
42
27
43
- def create (self , stream_state : Mapping [str , Any ]) -> DeclarativeCursor :
28
+ def create (self , stream_state : Mapping [str , Any ]) -> Cursor :
44
29
return self ._create_function (stream_state = stream_state )[0 ]
45
30
46
31
@@ -78,52 +63,36 @@ def __init__(
78
63
stream_state : Any ,
79
64
message_repository : MessageRepository ,
80
65
connector_state_manager : ConnectorStateManager ,
81
- connector_state_converter : AbstractStreamStateConverter ,
82
66
cursor_field : CursorField ,
83
- slice_boundary_fields : Optional [Tuple [str , str ]],
84
- start : Optional [CursorValueType ],
85
- end_provider : Callable [[], CursorValueType ],
86
- lookback_window : Optional [GapType ] = None ,
87
- slice_range : Optional [GapType ] = None ,
88
- cursor_granularity : Optional [GapType ] = None ,
89
67
) -> None :
90
68
self ._stream_name = stream_name
91
69
self ._stream_namespace = stream_namespace
92
70
self ._message_repository = message_repository
93
- self ._connector_state_converter = connector_state_converter
94
71
self ._connector_state_manager = connector_state_manager
95
72
self ._cursor_field = cursor_field
96
- # To see some example where the slice boundaries might not be defined, check https://github.com/airbytehq/airbyte/blob/1ce84d6396e446e1ac2377362446e3fb94509461/airbyte-integrations/connectors/source-stripe/source_stripe/streams.py#L363-L379
97
- self ._slice_boundary_fields = slice_boundary_fields
98
- self ._start = start
99
- self ._end_provider = end_provider
100
- self ._conccurent_state = stream_state
101
- # self.start, self._concurrent_state = self._get_concurrent_state(stream_state)
102
- self ._lookback_window = lookback_window
103
- self ._slice_range = slice_range
104
- self ._most_recent_cursor_value_per_partition : MutableMapping [Partition , Any ] = {}
105
- self ._has_closed_at_least_one_slice = False
106
- self ._cursor_granularity = cursor_granularity
107
73
108
74
self ._cursor_factory = cursor_factory
109
75
self ._partition_router = partition_router
76
+
110
77
# The dict is ordered to ensure that once the maximum number of partitions is reached,
111
78
# the oldest partitions can be efficiently removed, maintaining the most recent partitions.
112
79
self ._cursor_per_partition : OrderedDict [str , Cursor ] = OrderedDict ()
113
80
self ._over_limit = 0
114
- self ._state = {}
115
81
self ._partition_serializer = PerPartitionKeySerializer ()
116
82
117
83
self ._set_initial_state (stream_state )
118
84
85
+ @property
86
+ def cursor_field (self ) -> CursorField :
87
+ return self ._cursor_field
88
+
119
89
@property
120
90
def state (self ) -> MutableMapping [str , Any ]:
121
91
states = []
122
92
for partition_tuple , cursor in self ._cursor_per_partition .items ():
123
93
cursor_state = cursor ._connector_state_converter .convert_to_state_message (
124
94
cursor ._cursor_field , cursor .state
125
95
)
126
- # print(cursor_state, cursor.state)
127
96
if cursor_state :
128
97
states .append (
129
98
{
@@ -132,13 +101,8 @@ def state(self) -> MutableMapping[str, Any]:
132
101
}
133
102
)
134
103
state : dict [str , Any ] = {"states" : states }
135
- # print(state)
136
104
return state
137
105
138
- @property
139
- def cursor_field (self ) -> CursorField :
140
- return self ._cursor_field
141
-
142
106
def close_partition (self , partition : Partition ) -> None :
143
107
self ._cursor_per_partition [self ._to_partition_key (partition ._stream_slice .partition )].close_partition_without_emit (partition = partition )
144
108
@@ -253,8 +217,7 @@ def _set_initial_state(self, stream_state: StreamState) -> None:
253
217
self ._partition_router .set_initial_state (stream_state )
254
218
255
219
def observe (self , record : Record ) -> None :
256
- print (f"ESTATE: { self ._to_partition_key (record .partition ._stream_slice .partition )} : { record .data [self .cursor_field .cursor_field_key ]} " )
257
- self ._cursor_per_partition [self ._to_partition_key (record .partition ._stream_slice .partition )].observe (record )
220
+ self ._cursor_per_partition [self ._to_partition_key (record .associated_slice .partition )].observe (record )
258
221
259
222
def _to_partition_key (self , partition : Mapping [str , Any ]) -> str :
260
223
return self ._partition_serializer .to_partition_key (partition )
@@ -267,9 +230,7 @@ def _create_cursor(self, cursor_state: Any) -> DeclarativeCursor:
267
230
return cursor
268
231
269
232
def should_be_synced (self , record : Record ) -> bool :
270
- return self ._get_cursor (record ).should_be_synced (
271
- self ._convert_record_to_cursor_record (record )
272
- )
233
+ return self ._get_cursor (record ).should_be_synced (record )
273
234
274
235
def is_greater_than_or_equal (self , first : Record , second : Record ) -> bool :
275
236
if not first .associated_slice or not second .associated_slice :
@@ -295,7 +256,7 @@ def _convert_record_to_cursor_record(record: Record) -> Record:
295
256
else None ,
296
257
)
297
258
298
- def _get_cursor (self , record : Record ) -> DeclarativeCursor :
259
+ def _get_cursor (self , record : Record ) -> Cursor :
299
260
if not record .associated_slice :
300
261
raise ValueError (
301
262
"Invalid state as stream slices that are emitted should refer to an existing cursor"
0 commit comments