1
0
mirror of synced 2026-01-01 18:02:53 -05:00
Files
airbyte/airbyte-cdk/python/airbyte_cdk/sources/declarative/checks/check_stream.py
midavadim c44c3eae48 CDK: availability check - handle HttpErrors which happen during slice extraction (#26630)
* for availability check - handle  HttError happens during slice extraction (reading of parent stream),
updated reason messages,
moved check availability call under common try/except which handles errors during usual stream read,
moved log messages which indicate start of the stream sync before availability check in to make to understand which stream is the source of errors

* why do we return here and not try next stream?

* fixed bug in CheckStream, now we try to check availability for all streams
2023-06-23 13:15:25 -04:00

49 lines
2.0 KiB
Python

#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
import logging
import traceback
from dataclasses import InitVar, dataclass
from typing import Any, List, Mapping, Tuple
from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker
from airbyte_cdk.sources.source import Source
from airbyte_cdk.sources.streams.http.availability_strategy import HttpAvailabilityStrategy
@dataclass
class CheckStream(ConnectionChecker):
"""
Checks the connections by checking availability of one or many streams selected by the developer
Attributes:
stream_name (List[str]): names of streams to check
"""
stream_names: List[str]
parameters: InitVar[Mapping[str, Any]]
def __post_init__(self, parameters: Mapping[str, Any]):
self._parameters = parameters
def check_connection(self, source: Source, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, any]:
streams = source.streams(config)
stream_name_to_stream = {s.name: s for s in streams}
if len(streams) == 0:
return False, f"No streams to connect to from source {source}"
for stream_name in self.stream_names:
if stream_name not in stream_name_to_stream.keys():
raise ValueError(f"{stream_name} is not part of the catalog. Expected one of {stream_name_to_stream.keys()}.")
stream = stream_name_to_stream[stream_name]
availability_strategy = stream.availability_strategy or HttpAvailabilityStrategy()
try:
stream_is_available, reason = availability_strategy.check_availability(stream, logger, source)
if not stream_is_available:
return False, reason
except Exception as error:
logger.error(f"Encountered an error trying to connect to stream {stream_name}. Error: \n {traceback.format_exc()}")
return False, f"Unable to connect to stream {stream_name} - {error}"
return True, None