* [ISSUE #20771] limiting the number of requests performed to the backend without flag * [ISSUE #20771] code reviewing my own code * [ISSUE #20771] adding ABC to paginator * [ISSUE #20771] format code * [ISSUE #20771] adding slices to connector builder read request (#21605) * [ISSUE #20771] adding slices to connector builder read request * [ISSUE #20771] formatting * [ISSUE #20771] set flag when limit requests reached (#21619) * [ISSUE #20771] set flag when limit requests reached * [ISSUE #20771] assert proper value on test read objects __init__ * [ISSUE #20771] code review and fix edge case * [ISSUE #20771] fix flake8 error * [ISSUE #20771] code review * 🤖 Bump minor version of Airbyte CDK * to run the CI
284 lines
14 KiB
Python
284 lines
14 KiB
Python
#
|
|
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
|
|
#
|
|
|
|
import inspect
|
|
import json
|
|
import logging
|
|
import pkgutil
|
|
import typing
|
|
from dataclasses import dataclass, fields
|
|
from enum import Enum, EnumMeta
|
|
from typing import Any, Iterator, List, Mapping, MutableMapping, Union
|
|
|
|
import yaml
|
|
from airbyte_cdk.models import (
|
|
AirbyteConnectionStatus,
|
|
AirbyteMessage,
|
|
AirbyteStateMessage,
|
|
ConfiguredAirbyteCatalog,
|
|
ConnectorSpecification,
|
|
)
|
|
from airbyte_cdk.sources.declarative.checks import CheckStream
|
|
from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker
|
|
from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
|
|
from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
|
|
from airbyte_cdk.sources.declarative.exceptions import InvalidConnectorDefinitionException
|
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import CheckStream as CheckStreamModel
|
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import DeclarativeStream as DeclarativeStreamModel
|
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel
|
|
from airbyte_cdk.sources.declarative.parsers.factory import DeclarativeComponentFactory
|
|
from airbyte_cdk.sources.declarative.parsers.manifest_component_transformer import ManifestComponentTransformer
|
|
from airbyte_cdk.sources.declarative.parsers.manifest_reference_resolver import ManifestReferenceResolver
|
|
from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import ModelToComponentFactory
|
|
from airbyte_cdk.sources.declarative.types import ConnectionDefinition
|
|
from airbyte_cdk.sources.streams.core import Stream
|
|
from dataclasses_jsonschema import JsonSchemaMixin
|
|
from jsonschema.exceptions import ValidationError
|
|
from jsonschema.validators import validate
|
|
|
|
|
|
@dataclass
|
|
class ConcreteDeclarativeSource(JsonSchemaMixin):
|
|
version: str
|
|
check: CheckStream
|
|
streams: List[DeclarativeStream]
|
|
|
|
|
|
class ManifestDeclarativeSource(DeclarativeSource):
|
|
"""Declarative source defined by a manifest of low-code components that define source connector behavior"""
|
|
|
|
VALID_TOP_LEVEL_FIELDS = {"check", "definitions", "schemas", "spec", "streams", "type", "version"}
|
|
|
|
def __init__(
|
|
self,
|
|
source_config: ConnectionDefinition,
|
|
debug: bool = False,
|
|
component_factory: ModelToComponentFactory = None,
|
|
construct_using_pydantic_models: bool = False,
|
|
):
|
|
"""
|
|
:param source_config(Mapping[str, Any]): The manifest of low-code components that describe the source connector
|
|
:param debug(bool): True if debug mode is enabled
|
|
"""
|
|
self.logger = logging.getLogger(f"airbyte.{self.name}")
|
|
|
|
# Controls whether we build components using the manual handwritten schema and Pydantic models or the legacy flow
|
|
self.construct_using_pydantic_models = True
|
|
|
|
# For ease of use we don't require the type to be specified at the top level manifest, but it should be included during processing
|
|
manifest = dict(source_config)
|
|
if "type" not in manifest:
|
|
manifest["type"] = "DeclarativeSource"
|
|
|
|
resolved_source_config = ManifestReferenceResolver().preprocess_manifest(manifest)
|
|
propagated_source_config = ManifestComponentTransformer().propagate_types_and_options("", resolved_source_config, {})
|
|
self._new_source_config = propagated_source_config
|
|
self._legacy_source_config = resolved_source_config
|
|
self._debug = debug
|
|
self._legacy_factory = DeclarativeComponentFactory() # Legacy factory used to instantiate declarative components from the manifest
|
|
if component_factory:
|
|
self._constructor = component_factory
|
|
else:
|
|
self._constructor = (
|
|
ModelToComponentFactory()
|
|
) # New factory which converts the manifest to Pydantic models to construct components
|
|
|
|
self._validate_source()
|
|
|
|
# todo: The handwritten schema uses additionalProperties: false to validate, when we delete legacy path this can be removed
|
|
# Stopgap to protect the top-level namespace until it's validated through the schema
|
|
unknown_fields = [key for key in self._legacy_source_config.keys() if key not in self.VALID_TOP_LEVEL_FIELDS]
|
|
if unknown_fields:
|
|
raise InvalidConnectorDefinitionException(f"Found unknown top-level fields: {unknown_fields}")
|
|
|
|
@property
|
|
def resolved_manifest(self) -> Mapping[str, Any]:
|
|
return self._new_source_config
|
|
|
|
@property
|
|
def connection_checker(self) -> ConnectionChecker:
|
|
check = self._new_source_config["check"] if self.construct_using_pydantic_models else self._legacy_source_config["check"]
|
|
if "type" not in check:
|
|
check["type"] = "CheckStream"
|
|
if self.construct_using_pydantic_models:
|
|
check_stream = self._constructor.create_component(CheckStreamModel, check, dict())
|
|
if isinstance(check_stream, ConnectionChecker):
|
|
return check_stream
|
|
else:
|
|
raise ValueError(f"Expected to generate a ConnectionChecker component, but received {check_stream.__class__}")
|
|
else:
|
|
return self._legacy_factory.create_component(check, dict())(source=self)
|
|
|
|
def streams(self, config: Mapping[str, Any]) -> List[Stream]:
|
|
source_config = self._new_source_config if self.construct_using_pydantic_models else self._legacy_source_config
|
|
self._emit_manifest_debug_message(extra_args={"source_name": self.name, "parsed_config": json.dumps(source_config)})
|
|
|
|
if self.construct_using_pydantic_models:
|
|
source_streams = [
|
|
self._constructor.create_component(DeclarativeStreamModel, stream_config, config)
|
|
for stream_config in self._stream_configs(source_config)
|
|
]
|
|
else:
|
|
source_streams = [
|
|
self._legacy_factory.create_component(stream_config, config, True)()
|
|
for stream_config in self._stream_configs(source_config)
|
|
]
|
|
|
|
for stream in source_streams:
|
|
# make sure the log level is always applied to the stream's logger
|
|
self._apply_log_level_to_stream_logger(self.logger, stream)
|
|
return source_streams
|
|
|
|
def spec(self, logger: logging.Logger) -> ConnectorSpecification:
|
|
"""
|
|
Returns the connector specification (spec) as defined in the Airbyte Protocol. The spec is an object describing the possible
|
|
configurations (e.g: username and password) which can be configured when running this connector. For low-code connectors, this
|
|
will first attempt to load the spec from the manifest's spec block, otherwise it will load it from "spec.yaml" or "spec.json"
|
|
in the project root.
|
|
"""
|
|
self._configure_logger_level(logger)
|
|
source_config = self._new_source_config if self.construct_using_pydantic_models else self._legacy_source_config
|
|
self._emit_manifest_debug_message(extra_args={"source_name": self.name, "parsed_config": json.dumps(source_config)})
|
|
|
|
spec = source_config.get("spec")
|
|
if spec:
|
|
if "type" not in spec:
|
|
spec["type"] = "Spec"
|
|
if self.construct_using_pydantic_models:
|
|
spec_component = self._constructor.create_component(SpecModel, spec, dict())
|
|
else:
|
|
spec_component = self._legacy_factory.create_component(spec, dict())()
|
|
return spec_component.generate_spec()
|
|
else:
|
|
return super().spec(logger)
|
|
|
|
def check(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteConnectionStatus:
|
|
self._configure_logger_level(logger)
|
|
return super().check(logger, config)
|
|
|
|
def read(
|
|
self,
|
|
logger: logging.Logger,
|
|
config: Mapping[str, Any],
|
|
catalog: ConfiguredAirbyteCatalog,
|
|
state: Union[List[AirbyteStateMessage], MutableMapping[str, Any]] = None,
|
|
) -> Iterator[AirbyteMessage]:
|
|
self._configure_logger_level(logger)
|
|
yield from super().read(logger, config, catalog, state)
|
|
|
|
def _configure_logger_level(self, logger: logging.Logger):
|
|
"""
|
|
Set the log level to logging.DEBUG if debug mode is enabled
|
|
"""
|
|
if self._debug:
|
|
logger.setLevel(logging.DEBUG)
|
|
|
|
def _validate_source(self):
|
|
# Validates the connector manifest against the schema auto-generated from the low-code backend
|
|
full_config = {}
|
|
if "version" in self._legacy_source_config:
|
|
full_config["version"] = self._legacy_source_config["version"]
|
|
full_config["check"] = self._legacy_source_config["check"]
|
|
streams = [
|
|
self._legacy_factory.create_component(stream_config, {}, False)()
|
|
for stream_config in self._stream_configs(self._legacy_source_config)
|
|
]
|
|
if len(streams) > 0:
|
|
full_config["streams"] = streams
|
|
declarative_source_schema = ConcreteDeclarativeSource.json_schema()
|
|
|
|
try:
|
|
validate(full_config, declarative_source_schema)
|
|
except ValidationError as e:
|
|
raise ValidationError("Validation against auto-generated schema failed") from e
|
|
|
|
# Validates the connector manifest against the low-code component json schema
|
|
try:
|
|
raw_component_schema = pkgutil.get_data("airbyte_cdk", "sources/declarative/declarative_component_schema.yaml")
|
|
declarative_component_schema = yaml.load(raw_component_schema, Loader=yaml.SafeLoader)
|
|
except FileNotFoundError as e:
|
|
raise FileNotFoundError(f"Failed to read manifest component json schema required for validation: {e}")
|
|
|
|
try:
|
|
validate(self._new_source_config, declarative_component_schema)
|
|
except ValidationError as e:
|
|
raise ValidationError("Validation against json schema defined in declarative_component_schema.yaml schema failed") from e
|
|
|
|
def _stream_configs(self, manifest: Mapping[str, Any]):
|
|
# This has a warning flag for static, but after we finish part 4 we'll replace manifest with self._source_config
|
|
stream_configs = manifest.get("streams", [])
|
|
for s in stream_configs:
|
|
if "type" not in s:
|
|
s["type"] = "DeclarativeStream"
|
|
return stream_configs
|
|
|
|
@staticmethod
|
|
def generate_schema() -> str:
|
|
expanded_source_manifest = ManifestDeclarativeSource.expand_schema_interfaces(ConcreteDeclarativeSource, {})
|
|
expanded_schema = expanded_source_manifest.json_schema()
|
|
return json.dumps(expanded_schema, cls=SchemaEncoder)
|
|
|
|
@staticmethod
|
|
def expand_schema_interfaces(expand_class: type, visited: dict) -> type:
|
|
"""
|
|
Recursive function that takes in class type that will have its interface fields unpacked and expended and then recursively
|
|
attempt the same expansion on all the class' underlying fields that are declarative component. It also performs expansion
|
|
with respect to interfaces that are contained within generic data types.
|
|
:param expand_class: The declarative component class that will have its interface fields expanded
|
|
:param visited: cache used to store a record of already visited declarative classes that have already been seen
|
|
:return: The expanded declarative component
|
|
"""
|
|
|
|
# Recursive base case to stop recursion if we have already expanded an interface in case of cyclical components
|
|
# like CompositeErrorHandler
|
|
if expand_class.__name__ in visited:
|
|
return visited[expand_class.__name__]
|
|
visited[expand_class.__name__] = expand_class
|
|
|
|
next_classes = []
|
|
class_fields = fields(expand_class)
|
|
for field in class_fields:
|
|
unpacked_field_types = DeclarativeComponentFactory.unpack(field.type)
|
|
expand_class.__annotations__[field.name] = unpacked_field_types
|
|
next_classes.extend(ManifestDeclarativeSource._get_next_expand_classes(field.type))
|
|
for next_class in next_classes:
|
|
ManifestDeclarativeSource.expand_schema_interfaces(next_class, visited)
|
|
return expand_class
|
|
|
|
@staticmethod
|
|
def _get_next_expand_classes(field_type) -> list[type]:
|
|
"""
|
|
Parses through a given field type and assembles a list of all underlying declarative components. For a concrete declarative class
|
|
it will return itself. For a declarative interface it will return its subclasses. For declarative components in a generic type
|
|
it will return the unpacked classes. Any non-declarative types will be skipped.
|
|
:param field_type: A field type that
|
|
:return:
|
|
"""
|
|
generic_type = typing.get_origin(field_type)
|
|
if generic_type is None:
|
|
# We can only continue parsing declarative that inherit from the JsonSchemaMixin class because it is used
|
|
# to generate the final json schema
|
|
if inspect.isclass(field_type) and issubclass(field_type, JsonSchemaMixin) and not isinstance(field_type, EnumMeta):
|
|
subclasses = field_type.__subclasses__()
|
|
if subclasses:
|
|
return subclasses
|
|
else:
|
|
return [field_type]
|
|
elif generic_type == list or generic_type == Union:
|
|
next_classes = []
|
|
for underlying_type in typing.get_args(field_type):
|
|
next_classes.extend(ManifestDeclarativeSource._get_next_expand_classes(underlying_type))
|
|
return next_classes
|
|
return []
|
|
|
|
def _emit_manifest_debug_message(self, extra_args: dict):
|
|
self.logger.debug("declarative source created from manifest", extra=extra_args)
|
|
|
|
|
|
class SchemaEncoder(json.JSONEncoder):
|
|
def default(self, obj):
|
|
if isinstance(obj, property) or isinstance(obj, Enum):
|
|
return str(obj)
|
|
return json.JSONEncoder.default(self, obj)
|