1
0
mirror of synced 2026-01-10 00:03:08 -05:00
Files
airbyte/airbyte-connector-builder-server/connector_builder/impl/default_api.py
Brian Lai 23679f5b41 instantiate a declarative connector and allow for reads to be invoked from the connector builder server (#19333)
* instantiate a declarative connector and allow for reads to be invoked from the connector builder server

* various pr feedback and cleaning up the code a bit

* refactor grouping logic into a separate function to illustrate how groups are being emitted

* fix the webapp to also pass config to the stream list endpoint

* fix dereference field

* replace error message handling with default FastAPI HTTPException

* pr feedback: more error messaging and some code reuse

* formatting

* regenerate open api spec
2022-11-15 12:15:58 -05:00

205 lines
10 KiB
Python

#
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
#
import json
import logging
from json import JSONDecodeError
from typing import Any, Dict, Iterable, Optional, Union
from urllib.parse import parse_qs, urljoin, urlparse
from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Type
from connector_builder.generated.apis.default_api_interface import DefaultApi
from connector_builder.generated.models.http_request import HttpRequest
from connector_builder.generated.models.http_response import HttpResponse
from connector_builder.generated.models.stream_read import StreamRead
from connector_builder.generated.models.stream_read_pages import StreamReadPages
from connector_builder.generated.models.stream_read_request_body import StreamReadRequestBody
from connector_builder.generated.models.stream_read_slices import StreamReadSlices
from connector_builder.generated.models.streams_list_read import StreamsListRead
from connector_builder.generated.models.streams_list_read_streams import StreamsListReadStreams
from connector_builder.generated.models.streams_list_request_body import StreamsListRequestBody
from connector_builder.impl.low_code_cdk_adapter import LowCodeSourceAdapter
from fastapi import Body, HTTPException
from jsonschema import ValidationError
class DefaultApiImpl(DefaultApi):
logger = logging.getLogger("airbyte.connector-builder")
async def get_manifest_template(self) -> str:
return """version: "0.1.0"
definitions:
selector:
extractor:
field_pointer: []
requester:
url_base: "https://example.com"
http_method: "GET"
authenticator:
type: BearerAuthenticator
api_token: "{{ config['api_key'] }}"
retriever:
record_selector:
$ref: "*ref(definitions.selector)"
paginator:
type: NoPagination
requester:
$ref: "*ref(definitions.requester)"
base_stream:
retriever:
$ref: "*ref(definitions.retriever)"
customers_stream:
$ref: "*ref(definitions.base_stream)"
$options:
name: "customers"
primary_key: "id"
path: "/example"
streams:
- "*ref(definitions.customers_stream)"
check:
stream_names:
- "customers"
spec:
documentation_url: https://docsurl.com
connection_specification:
title: Source Name Spec # 'TODO: Replace this with the name of your source.'
type: object
required:
- api_key
additionalProperties: true
properties:
# 'TODO: This schema defines the configuration required for the source. This usually involves metadata such as database and/or authentication information.':
api_key:
type: string
description: API Key
"""
async def list_streams(self, streams_list_request_body: StreamsListRequestBody = Body(None, description="")) -> StreamsListRead:
"""
Takes in a low code manifest and a config to resolve the list of streams that are available for testing
:param streams_list_request_body: Input parameters to retrieve the list of available streams
:return: Stream objects made up of a stream name and the HTTP URL it will send requests to
"""
adapter = self._create_low_code_adapter(manifest=streams_list_request_body.manifest)
stream_list_read = []
try:
for http_stream in adapter.get_http_streams(streams_list_request_body.config):
stream_list_read.append(
StreamsListReadStreams(
name=http_stream.name,
url=urljoin(http_stream.url_base, http_stream.path()),
)
)
except Exception as error:
raise HTTPException(status_code=400, detail=f"Could not list streams with with error: {error.args[0]}")
return StreamsListRead(streams=stream_list_read)
async def read_stream(self, stream_read_request_body: StreamReadRequestBody = Body(None, description="")) -> StreamRead:
"""
Using the provided manifest and config, invokes a sync for the specified stream and returns groups of Airbyte messages
that are produced during the read operation
:param stream_read_request_body: Input parameters to trigger the read operation for a stream
:return: Airbyte record messages produced by the sync grouped by slice and page
"""
adapter = self._create_low_code_adapter(manifest=stream_read_request_body.manifest)
single_slice = StreamReadSlices(pages=[])
log_messages = []
try:
for message_group in self._get_message_groups(
adapter.read_stream(stream_read_request_body.stream, stream_read_request_body.config)
):
if isinstance(message_group, AirbyteLogMessage):
log_messages.append({"message": message_group.message})
else:
single_slice.pages.append(message_group)
except Exception as error:
# TODO: We're temporarily using FastAPI's default exception model. Ideally we should use exceptions defined in the OpenAPI spec
raise HTTPException(status_code=400, detail=f"Could not perform read with with error: {error.args[0]}")
return StreamRead(logs=log_messages, slices=[single_slice])
def _get_message_groups(self, messages: Iterable[AirbyteMessage]) -> Iterable[Union[StreamReadPages, AirbyteLogMessage]]:
"""
Message groups are partitioned according to when request log messages are received. Subsequent response log messages
and record messages belong to the prior request log message and when we encounter another request, append the latest
message group.
Messages received from the CDK read operation will always arrive in the following order:
{type: LOG, log: {message: "request: ..."}}
{type: LOG, log: {message: "response: ..."}}
... 0 or more record messages
{type: RECORD, record: {data: ...}}
{type: RECORD, record: {data: ...}}
Repeats for each request/response made
Note: The exception is that normal log messages can be received at any time which are not incorporated into grouping
"""
first_page = True
current_records = []
current_page_request: Optional[HttpRequest] = None
current_page_response: Optional[HttpResponse] = None
for message in messages:
if first_page and message.type == Type.LOG and message.log.message.startswith("request:"):
first_page = False
request = self._create_request_from_log_message(message.log)
current_page_request = request
elif message.type == Type.LOG and message.log.message.startswith("request:"):
if not current_page_request or not current_page_response:
raise ValueError("Every message grouping should have at least one request and response")
yield StreamReadPages(request=current_page_request, response=current_page_response, records=current_records)
current_page_request = self._create_request_from_log_message(message.log)
current_records = []
elif message.type == Type.LOG and message.log.message.startswith("response:"):
current_page_response = self._create_response_from_log_message(message.log)
elif message.type == Type.LOG:
yield message.log
elif message.type == Type.RECORD:
current_records.append(message.record.data)
else:
if not current_page_request or not current_page_response:
raise ValueError("Every message grouping should have at least one request and response")
yield StreamReadPages(request=current_page_request, response=current_page_response, records=current_records)
def _create_request_from_log_message(self, log_message: AirbyteLogMessage) -> Optional[HttpRequest]:
# TODO: As a temporary stopgap, the CDK emits request data as a log message string. Ideally this should come in the
# form of a custom message object defined in the Airbyte protocol, but this unblocks us in the immediate while the
# protocol change is worked on.
raw_request = log_message.message.partition("request:")[2]
try:
request = json.loads(raw_request)
url = urlparse(request.get("url", ""))
full_path = f"{url.scheme}://{url.hostname}{url.path}" if url else ""
parameters = parse_qs(url.query) or None
return HttpRequest(url=full_path, headers=request.get("headers"), parameters=parameters, body=request.get("body"))
except JSONDecodeError as error:
self.logger.warning(f"Failed to parse log message into request object with error: {error}")
return None
def _create_response_from_log_message(self, log_message: AirbyteLogMessage) -> Optional[HttpResponse]:
# TODO: As a temporary stopgap, the CDK emits response data as a log message string. Ideally this should come in the
# form of a custom message object defined in the Airbyte protocol, but this unblocks us in the immediate while the
# protocol change is worked on.
raw_response = log_message.message.partition("response:")[2]
try:
response = json.loads(raw_response)
body = json.loads(response.get("body", "{}"))
return HttpResponse(status=response.get("status_code"), body=body, headers=response.get("headers"))
except JSONDecodeError as error:
self.logger.warning(f"Failed to parse log message into response object with error: {error}")
return None
@staticmethod
def _create_low_code_adapter(manifest: Dict[str, Any]) -> LowCodeSourceAdapter:
try:
return LowCodeSourceAdapter(manifest=manifest)
except ValidationError as error:
# TODO: We're temporarily using FastAPI's default exception model. Ideally we should use exceptions defined in the OpenAPI spec
raise HTTPException(status_code=400, detail=f"Invalid connector manifest with error: {error.message}")