refactor!(airbyte-cdk): remove availability strategy (#40682)

Signed-off-by: Artem Inzhyyants <artem.inzhyyants@gmail.com>
2026-01-31 19:01:59 -05:00 · 2024-07-17 11:55:51 +02:00
parent 536e8e21c8
commit 2bfb2264d9
10 changed files with 18 additions and 287 deletions
--- a/airbyte-cdk/python/airbyte_cdk/sources/concurrent_source/concurrent_source.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/concurrent_source/concurrent_source.py
@@ -4,9 +4,9 @@
 import concurrent
 import logging
 from queue import Queue
-from typing import Iterable, Iterator, List, Optional, Tuple
+from typing import Iterable, Iterator, List

-from airbyte_cdk.models import AirbyteMessage, AirbyteStreamStatus
+from airbyte_cdk.models import AirbyteMessage
 from airbyte_cdk.sources.concurrent_source.concurrent_read_processor import ConcurrentReadProcessor
 from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import PartitionGenerationCompletedSentinel
 from airbyte_cdk.sources.concurrent_source.stream_thread_exception import StreamThreadException
@@ -19,7 +19,6 @@ from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partitio
 from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
 from airbyte_cdk.sources.streams.concurrent.partitions.types import PartitionCompleteSentinel, QueueItem
 from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger, SliceLogger
-from airbyte_cdk.utils.stream_status_utils import as_airbyte_message as stream_status_as_airbyte_message


 class ConcurrentSource:
@@ -81,15 +80,6 @@ class ConcurrentSource:
        streams: List[AbstractStream],
    ) -> Iterator[AirbyteMessage]:
        self._logger.info("Starting syncing")
-        stream_instances_to_read_from, not_available_streams = self._get_streams_to_read_from(streams)
-
-        for stream, message in not_available_streams:
-            self._logger.warning(f"Skipped syncing stream '{stream.name}' because it was unavailable. {message}")
-            yield stream_status_as_airbyte_message(stream, AirbyteStreamStatus.INCOMPLETE)
-
-        # Return early if there are no streams to read from
-        if not stream_instances_to_read_from:
-            return

        # We set a maxsize to for the main thread to process record items when the queue size grows. This assumes that there are less
        # threads generating partitions that than are max number of workers. If it weren't the case, we could have threads only generating
@@ -97,7 +87,7 @@ class ConcurrentSource:
        # information and might even need to be configurable depending on the source
        queue: Queue[QueueItem] = Queue(maxsize=10_000)
        concurrent_stream_processor = ConcurrentReadProcessor(
-            stream_instances_to_read_from,
+            streams,
            PartitionEnqueuer(queue, self._threadpool),
            self._threadpool,
            self._logger,
@@ -155,22 +145,3 @@ class ConcurrentSource:
            yield from concurrent_stream_processor.on_record(queue_item)
        else:
            raise ValueError(f"Unknown queue item type: {type(queue_item)}")
-
-    def _get_streams_to_read_from(
-        self, streams: List[AbstractStream]
-    ) -> Tuple[List[AbstractStream], List[Tuple[AbstractStream, Optional[str]]]]:
-        """
-        Iterate over the configured streams and return a list of streams to read from.
-        If a stream is not configured, it will be skipped.
-        If a stream is configured but does not exist in the source and self.raise_exception_on_missing_stream is True, an exception will be raised
-        If a stream is not available, it will be skipped
-        """
-        stream_instances_to_read_from = []
-        not_available_streams = []
-        for stream in streams:
-            stream_availability = stream.check_availability()
-            if not stream_availability.is_available():
-                not_available_streams.append((stream, stream_availability.message()))
-                continue
-            stream_instances_to_read_from.append(stream)
-        return stream_instances_to_read_from, not_available_streams