* [ISSUE #26343] update close_slice to use the greater record * Renaming parameter for close_slice * code review
65 lines
2.8 KiB
Python
65 lines
2.8 KiB
Python
#
|
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
#
|
|
|
|
from abc import ABC, abstractmethod
|
|
from typing import Optional
|
|
|
|
from airbyte_cdk.sources.declarative.stream_slicers.stream_slicer import StreamSlicer
|
|
from airbyte_cdk.sources.declarative.types import Record, StreamSlice, StreamState
|
|
|
|
|
|
class Cursor(ABC, StreamSlicer):
|
|
"""
|
|
Cursors are components that allow for incremental syncs. They keep track of what data has been consumed and slices the requests based on
|
|
that information.
|
|
"""
|
|
|
|
@abstractmethod
|
|
def set_initial_state(self, stream_state: StreamState) -> None:
|
|
"""
|
|
Cursors are not initialized with their state. As state is needed in order to function properly, this method should be called
|
|
before calling anything else
|
|
|
|
:param stream_state: The state of the stream as returned by get_stream_state
|
|
"""
|
|
|
|
@abstractmethod
|
|
def close_slice(self, stream_slice: StreamSlice, most_recent_record: Optional[Record]) -> None:
|
|
"""
|
|
Update state based on the stream slice and the latest record. Note that `stream_slice.cursor_slice` and
|
|
`last_record.associated_slice` are expected to be the same but we make it explicit here that `stream_slice` should be leveraged to
|
|
update the state.
|
|
|
|
:param stream_slice: slice to close
|
|
:param last_record: the latest record we have received for the slice. This is important to consider because even if the cursor emits
|
|
a slice, some APIs are not able to enforce the upper boundary. The outcome is that the last_record might have a higher cursor
|
|
value than the slice upper boundary and if we want to reduce the duplication as much as possible, we need to consider the highest
|
|
value between the internal cursor, the stream slice upper boundary and the record cursor value.
|
|
"""
|
|
|
|
@abstractmethod
|
|
def get_stream_state(self) -> StreamState:
|
|
"""
|
|
Returns the current stream state. We would like to restrict it's usage since it does expose internal of state. As of 2023-06-14, it
|
|
is used for two things:
|
|
* Interpolation of the requests
|
|
* Transformation of records
|
|
* Saving the state
|
|
|
|
For the first case, we are probably stuck with exposing the stream state. For the second, we can probably expose a method that
|
|
allows for emitting the state to the platform.
|
|
"""
|
|
|
|
@abstractmethod
|
|
def should_be_synced(self, record: Record) -> bool:
|
|
"""
|
|
Evaluating if a record should be synced allows for filtering and stop condition on pagination
|
|
"""
|
|
|
|
@abstractmethod
|
|
def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
|
|
"""
|
|
Evaluating which record is greater in terms of cursor. This is used to avoid having to capture all the records to close a slice
|
|
"""
|