* New connector_builder module for handling requests from the Connector Builder. Also implements `resolve_manifest` handler * Automated Commit - Formatting Changes * Rename ConnectorBuilderSource to ConnectorBuilderHandler * Update source_declarative_manifest README * Reorganize * read records * paste unit tests from connector builder server * compiles but tests fail * first test passes * Second test passes * 3rd test passes * one more test * another test * one more test * test * return StreamRead * test * test * rename * test * test * test * main seems to work * Update * Update * Update * Update * update * error message * rename * update * Update * CR improvements * fix test_source_declarative_manifest * fix tests * Update * Update * Update * Update * rename * rename * rename * format * Give connector_builder its own main.py * Update * reset * delete dead code * remove debug print * update test * Update * set right stream * Add --catalog argument * Remove unneeded preparse * Update README * handle error * tests pass * more explicit test * reset * format * fix merge * raise exception * fix * black format * raise with config * update * fix flake * __test_read_config is optional * fix * Automated Commit - Formatting Changes * fix * exclude_unset --------- Co-authored-by: Catherine Noll <noll.catherine@gmail.com> Co-authored-by: clnoll <clnoll@users.noreply.github.com> Co-authored-by: girarda <girarda@users.noreply.github.com>
91 lines
4.2 KiB
Python
91 lines
4.2 KiB
Python
#
|
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
#
|
|
|
|
|
|
import logging
|
|
from abc import ABC, abstractmethod
|
|
from collections import defaultdict
|
|
from typing import Any, Generic, Iterable, List, Mapping, MutableMapping, TypeVar, Union
|
|
|
|
from airbyte_cdk.connector import BaseConnector, DefaultConnectorMixin, TConfig
|
|
from airbyte_cdk.models import AirbyteCatalog, AirbyteMessage, AirbyteStateMessage, AirbyteStateType, ConfiguredAirbyteCatalog
|
|
|
|
TState = TypeVar("TState")
|
|
TCatalog = TypeVar("TCatalog")
|
|
|
|
|
|
class BaseSource(BaseConnector[TConfig], ABC, Generic[TConfig, TState, TCatalog]):
|
|
@abstractmethod
|
|
def read_state(self, state_path: str) -> TState:
|
|
...
|
|
|
|
@abstractmethod
|
|
def read_catalog(self, catalog_path: str) -> TCatalog:
|
|
...
|
|
|
|
@abstractmethod
|
|
def read(self, logger: logging.Logger, config: TConfig, catalog: TCatalog, state: TState = None) -> Iterable[AirbyteMessage]:
|
|
"""
|
|
Returns a generator of the AirbyteMessages generated by reading the source with the given configuration, catalog, and state.
|
|
"""
|
|
|
|
@abstractmethod
|
|
def discover(self, logger: logging.Logger, config: TConfig) -> AirbyteCatalog:
|
|
"""
|
|
Returns an AirbyteCatalog representing the available streams and fields in this integration. For example, given valid credentials to a
|
|
Postgres database, returns an Airbyte catalog where each postgres table is a stream, and each table column is a field.
|
|
"""
|
|
|
|
|
|
class Source(
|
|
DefaultConnectorMixin,
|
|
BaseSource[Mapping[str, Any], Union[List[AirbyteStateMessage], MutableMapping[str, Any]], ConfiguredAirbyteCatalog],
|
|
ABC,
|
|
):
|
|
# can be overridden to change an input state
|
|
def read_state(self, state_path: str) -> Union[List[AirbyteStateMessage], MutableMapping[str, Any]]:
|
|
"""
|
|
Retrieves the input state of a sync by reading from the specified JSON file. Incoming state can be deserialized into either
|
|
a JSON object for legacy state input or as a list of AirbyteStateMessages for the per-stream state format. Regardless of the
|
|
incoming input type, it will always be transformed and output as a list of AirbyteStateMessage(s).
|
|
:param state_path: The filepath to where the stream states are located
|
|
:return: The complete stream state based on the connector's previous sync
|
|
"""
|
|
if state_path:
|
|
state_obj = self._read_json_file(state_path)
|
|
if not state_obj:
|
|
return self._emit_legacy_state_format({})
|
|
is_per_stream_state = isinstance(state_obj, List)
|
|
if is_per_stream_state:
|
|
parsed_state_messages = []
|
|
for state in state_obj:
|
|
parsed_message = AirbyteStateMessage.parse_obj(state)
|
|
if not parsed_message.stream and not parsed_message.data and not parsed_message.global_:
|
|
raise ValueError("AirbyteStateMessage should contain either a stream, global, or state field")
|
|
parsed_state_messages.append(parsed_message)
|
|
return parsed_state_messages
|
|
else:
|
|
return self._emit_legacy_state_format(state_obj)
|
|
return self._emit_legacy_state_format({})
|
|
|
|
def _emit_legacy_state_format(self, state_obj) -> Union[List[AirbyteStateMessage], MutableMapping[str, Any]]:
|
|
"""
|
|
Existing connectors that override read() might not be able to interpret the new state format. We temporarily
|
|
send state in the old format for these connectors, but once all have been upgraded, this method can be removed,
|
|
and we can then emit state in the list format.
|
|
"""
|
|
# vars(self.__class__) checks if the current class directly overrides the read() function
|
|
if "read" in vars(self.__class__):
|
|
return defaultdict(dict, state_obj)
|
|
else:
|
|
if state_obj:
|
|
return [AirbyteStateMessage(type=AirbyteStateType.LEGACY, data=state_obj)]
|
|
else:
|
|
return []
|
|
|
|
# can be overridden to change an input catalog
|
|
@classmethod
|
|
def read_catalog(cls, catalog_path: str) -> ConfiguredAirbyteCatalog:
|
|
return ConfiguredAirbyteCatalog.parse_obj(cls._read_json_file(catalog_path))
|