3
3
#
4
4
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
5
5
#
6
+ import threading
6
7
import logging
7
8
from collections import OrderedDict
8
9
from typing import Any , Callable , Iterable , Mapping , MutableMapping , Optional
9
10
10
11
from airbyte_cdk .sources .connector_state_manager import ConnectorStateManager
12
+ from airbyte_cdk .sources .declarative .incremental .global_substream_cursor import iterate_with_last_flag_and_state , Timer
11
13
from airbyte_cdk .sources .declarative .incremental .declarative_cursor import DeclarativeCursor
12
14
from airbyte_cdk .sources .declarative .partition_routers .partition_router import PartitionRouter
13
15
from airbyte_cdk .sources .message import MessageRepository
@@ -77,6 +79,15 @@ def __init__(
77
79
# The dict is ordered to ensure that once the maximum number of partitions is reached,
78
80
# the oldest partitions can be efficiently removed, maintaining the most recent partitions.
79
81
self ._cursor_per_partition : OrderedDict [str , Cursor ] = OrderedDict ()
82
+ self ._state = {"states" : []}
83
+ self ._semaphore_per_partition = OrderedDict ()
84
+ self ._finished_partitions = set ()
85
+ self ._lock = threading .Lock ()
86
+ self ._timer = Timer ()
87
+ self ._global_cursor = None
88
+ self ._new_global_cursor = None
89
+ self ._lookback_window = 0
90
+ self ._parent_state = None
80
91
self ._over_limit = 0
81
92
self ._partition_serializer = PerPartitionKeySerializer ()
82
93
@@ -91,7 +102,7 @@ def state(self) -> MutableMapping[str, Any]:
91
102
states = []
92
103
for partition_tuple , cursor in self ._cursor_per_partition .items ():
93
104
cursor_state = cursor ._connector_state_converter .convert_to_state_message (
94
- cursor . _cursor_field , cursor .state
105
+ self . cursor_field , cursor .state
95
106
)
96
107
if cursor_state :
97
108
states .append (
@@ -101,16 +112,40 @@ def state(self) -> MutableMapping[str, Any]:
101
112
}
102
113
)
103
114
state : dict [str , Any ] = {"states" : states }
115
+
116
+ state ["state" ] = self ._global_cursor
117
+ if self ._lookback_window is not None :
118
+ state ["lookback_window" ] = self ._lookback_window
119
+ if self ._parent_state is not None :
120
+ state ["parent_state" ] = self ._parent_state
121
+ print (state )
104
122
return state
105
123
106
124
def close_partition (self , partition : Partition ) -> None :
107
- self ._cursor_per_partition [self ._to_partition_key (partition ._stream_slice .partition )].close_partition_without_emit (partition = partition )
125
+ print (f"Closing partition { self ._to_partition_key (partition ._stream_slice .partition )} " )
126
+ self ._cursor_per_partition [self ._to_partition_key (partition ._stream_slice .partition )].close_partition (partition = partition )
127
+ with (self ._lock ):
128
+ self ._semaphore_per_partition [self ._to_partition_key (partition ._stream_slice .partition )].acquire ()
129
+ cursor = self ._cursor_per_partition [self ._to_partition_key (partition ._stream_slice .partition )]
130
+ cursor_state = cursor ._connector_state_converter .convert_to_state_message (
131
+ cursor ._cursor_field , cursor .state
132
+ )
133
+ print (f"State { cursor_state } { cursor .state } " )
134
+ if self ._to_partition_key (partition ._stream_slice .partition ) in self ._finished_partitions \
135
+ and self ._semaphore_per_partition [self ._to_partition_key (partition ._stream_slice .partition )]._value == 0 :
136
+ if self ._new_global_cursor is None \
137
+ or self ._new_global_cursor [self .cursor_field .cursor_field_key ] < cursor_state [self .cursor_field .cursor_field_key ]:
138
+ self ._new_global_cursor = copy .deepcopy (cursor_state )
108
139
109
140
def ensure_at_least_one_state_emitted (self ) -> None :
110
141
"""
111
142
The platform expect to have at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
112
143
called.
113
144
"""
145
+ if not any (semaphore_item [1 ]._value for semaphore_item in self ._semaphore_per_partition .items ()):
146
+ self ._global_cursor = self ._new_global_cursor
147
+ self ._lookback_window = self ._timer .finish ()
148
+ self ._parent_state = self ._partition_router .get_stream_state ()
114
149
self ._emit_state_message ()
115
150
116
151
def _emit_state_message (self ) -> None :
@@ -127,6 +162,7 @@ def _emit_state_message(self) -> None:
127
162
128
163
def stream_slices (self ) -> Iterable [StreamSlice ]:
129
164
slices = self ._partition_router .stream_slices ()
165
+ self ._timer .start ()
130
166
for partition in slices :
131
167
yield from self .generate_slices_from_partition (partition )
132
168
@@ -143,8 +179,15 @@ def generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[Str
143
179
)
144
180
cursor = self ._create_cursor (partition_state )
145
181
self ._cursor_per_partition [self ._to_partition_key (partition .partition )] = cursor
146
-
147
- for cursor_slice in cursor .stream_slices ():
182
+ self ._semaphore_per_partition [self ._to_partition_key (partition .partition )] = threading .Semaphore (0 )
183
+
184
+ for cursor_slice , is_last_slice , _ in iterate_with_last_flag_and_state (
185
+ cursor .stream_slices (),
186
+ lambda : None ,
187
+ ):
188
+ self ._semaphore_per_partition [self ._to_partition_key (partition .partition )].release ()
189
+ if is_last_slice :
190
+ self ._finished_partitions .add (self ._to_partition_key (partition .partition ))
148
191
yield StreamSlice (
149
192
partition = partition , cursor_slice = cursor_slice , extra_fields = partition .extra_fields
150
193
)
@@ -208,6 +251,7 @@ def _set_initial_state(self, stream_state: StreamState) -> None:
208
251
self ._cursor_per_partition [self ._to_partition_key (state ["partition" ])] = (
209
252
self ._create_cursor (state ["cursor" ])
210
253
)
254
+ self ._semaphore_per_partition [self ._to_partition_key (state ["partition" ])] = threading .Semaphore (0 )
211
255
212
256
# set default state for missing partitions if it is per partition with fallback to global
213
257
if "state" in stream_state :
@@ -217,6 +261,7 @@ def _set_initial_state(self, stream_state: StreamState) -> None:
217
261
self ._partition_router .set_initial_state (stream_state )
218
262
219
263
def observe (self , record : Record ) -> None :
264
+ print (self ._to_partition_key (record .associated_slice .partition ), record )
220
265
self ._cursor_per_partition [self ._to_partition_key (record .associated_slice .partition )].observe (record )
221
266
222
267
def _to_partition_key (self , partition : Mapping [str , Any ]) -> str :
0 commit comments