1
0
mirror of synced 2025-12-31 06:05:12 -05:00
Files
airbyte/airbyte-cdk/python/airbyte_cdk/sources/source.py
Alexandre Girard bb5741a0c0 Connector builder: support for test read with message grouping per slices (#23925)
* New connector_builder module for handling requests from the Connector Builder.

Also implements `resolve_manifest` handler

* Automated Commit - Formatting Changes

* Rename ConnectorBuilderSource to ConnectorBuilderHandler

* Update source_declarative_manifest README

* Reorganize

* read records

* paste unit tests from connector builder server

* compiles but tests fail

* first test passes

* Second test passes

* 3rd test passes

* one more test

* another test

* one more test

* test

* return StreamRead

* test

* test

* rename

* test

* test

* test

* main seems to work

* Update

* Update

* Update

* Update

* update

* error message

* rename

* update

* Update

* CR improvements

* fix test_source_declarative_manifest

* fix tests

* Update

* Update

* Update

* Update

* rename

* rename

* rename

* format

* Give connector_builder its own main.py

* Update

* reset

* delete dead code

* remove debug print

* update test

* Update

* set right stream

* Add --catalog argument

* Remove unneeded preparse

* Update README

* handle error

* tests pass

* more explicit test

* reset

* format

* fix merge

* raise exception

* fix

* black format

* raise with config

* update

* fix flake

* __test_read_config is optional

* fix

* Automated Commit - Formatting Changes

* fix

* exclude_unset

---------

Co-authored-by: Catherine Noll <noll.catherine@gmail.com>
Co-authored-by: clnoll <clnoll@users.noreply.github.com>
Co-authored-by: girarda <girarda@users.noreply.github.com>
2023-03-15 17:12:37 -07:00

91 lines
4.2 KiB
Python

#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
import logging
from abc import ABC, abstractmethod
from collections import defaultdict
from typing import Any, Generic, Iterable, List, Mapping, MutableMapping, TypeVar, Union
from airbyte_cdk.connector import BaseConnector, DefaultConnectorMixin, TConfig
from airbyte_cdk.models import AirbyteCatalog, AirbyteMessage, AirbyteStateMessage, AirbyteStateType, ConfiguredAirbyteCatalog
TState = TypeVar("TState")
TCatalog = TypeVar("TCatalog")
class BaseSource(BaseConnector[TConfig], ABC, Generic[TConfig, TState, TCatalog]):
@abstractmethod
def read_state(self, state_path: str) -> TState:
...
@abstractmethod
def read_catalog(self, catalog_path: str) -> TCatalog:
...
@abstractmethod
def read(self, logger: logging.Logger, config: TConfig, catalog: TCatalog, state: TState = None) -> Iterable[AirbyteMessage]:
"""
Returns a generator of the AirbyteMessages generated by reading the source with the given configuration, catalog, and state.
"""
@abstractmethod
def discover(self, logger: logging.Logger, config: TConfig) -> AirbyteCatalog:
"""
Returns an AirbyteCatalog representing the available streams and fields in this integration. For example, given valid credentials to a
Postgres database, returns an Airbyte catalog where each postgres table is a stream, and each table column is a field.
"""
class Source(
DefaultConnectorMixin,
BaseSource[Mapping[str, Any], Union[List[AirbyteStateMessage], MutableMapping[str, Any]], ConfiguredAirbyteCatalog],
ABC,
):
# can be overridden to change an input state
def read_state(self, state_path: str) -> Union[List[AirbyteStateMessage], MutableMapping[str, Any]]:
"""
Retrieves the input state of a sync by reading from the specified JSON file. Incoming state can be deserialized into either
a JSON object for legacy state input or as a list of AirbyteStateMessages for the per-stream state format. Regardless of the
incoming input type, it will always be transformed and output as a list of AirbyteStateMessage(s).
:param state_path: The filepath to where the stream states are located
:return: The complete stream state based on the connector's previous sync
"""
if state_path:
state_obj = self._read_json_file(state_path)
if not state_obj:
return self._emit_legacy_state_format({})
is_per_stream_state = isinstance(state_obj, List)
if is_per_stream_state:
parsed_state_messages = []
for state in state_obj:
parsed_message = AirbyteStateMessage.parse_obj(state)
if not parsed_message.stream and not parsed_message.data and not parsed_message.global_:
raise ValueError("AirbyteStateMessage should contain either a stream, global, or state field")
parsed_state_messages.append(parsed_message)
return parsed_state_messages
else:
return self._emit_legacy_state_format(state_obj)
return self._emit_legacy_state_format({})
def _emit_legacy_state_format(self, state_obj) -> Union[List[AirbyteStateMessage], MutableMapping[str, Any]]:
"""
Existing connectors that override read() might not be able to interpret the new state format. We temporarily
send state in the old format for these connectors, but once all have been upgraded, this method can be removed,
and we can then emit state in the list format.
"""
# vars(self.__class__) checks if the current class directly overrides the read() function
if "read" in vars(self.__class__):
return defaultdict(dict, state_obj)
else:
if state_obj:
return [AirbyteStateMessage(type=AirbyteStateType.LEGACY, data=state_obj)]
else:
return []
# can be overridden to change an input catalog
@classmethod
def read_catalog(cls, catalog_path: str) -> ConfiguredAirbyteCatalog:
return ConfiguredAirbyteCatalog.parse_obj(cls._read_json_file(catalog_path))