1
0
mirror of synced 2026-01-02 21:02:43 -05:00
Files
airbyte/airbyte-cdk/python/airbyte_cdk/sources/declarative/checks/check_stream.py
Brian Lai de24ddced5 make sure stream_slices is an iterator (#18092)
* ensure output from stream_slices() is always an iterator

* bump cdk version

* bump version after merging latest cdk

* bump cdk version post rebase
2022-10-19 15:37:00 -04:00

62 lines
2.4 KiB
Python

#
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
#
import logging
from dataclasses import InitVar, dataclass
from typing import Any, List, Mapping, Tuple
from airbyte_cdk.models.airbyte_protocol import SyncMode
from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker
from airbyte_cdk.sources.source import Source
from dataclasses_jsonschema import JsonSchemaMixin
@dataclass
class CheckStream(ConnectionChecker, JsonSchemaMixin):
"""
Checks the connections by trying to read records from one or many of the streams selected by the developer
Attributes:
stream_name (List[str]): name of streams to read records from
"""
stream_names: List[str]
options: InitVar[Mapping[str, Any]]
def __post_init__(self, options: Mapping[str, Any]):
self._options = options
def check_connection(self, source: Source, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, any]:
streams = source.streams(config)
stream_name_to_stream = {s.name: s for s in streams}
if len(streams) == 0:
return False, f"No streams to connect to from source {source}"
for stream_name in self.stream_names:
if stream_name in stream_name_to_stream.keys():
stream = stream_name_to_stream[stream_name]
try:
# Some streams need a stream slice to read records (eg if they have a SubstreamSlicer)
stream_slice = self._get_stream_slice(stream)
records = stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice)
next(records)
except Exception as error:
return False, f"Unable to connect to stream {stream_name} - {error}"
else:
raise ValueError(f"{stream_name} is not part of the catalog. Expected one of {stream_name_to_stream.keys()}")
return True, None
def _get_stream_slice(self, stream):
# We wrap the return output of stream_slices() because some implementations return types that are iterable,
# but not iterators such as lists or tuples
slices = iter(
stream.stream_slices(
cursor_field=stream.cursor_field,
sync_mode=SyncMode.full_refresh,
)
)
try:
return next(slices)
except StopIteration:
return {}