1
0
mirror of synced 2026-01-23 22:01:53 -05:00
Files
airbyte/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_requester.py
2024-07-09 12:01:03 +02:00

310 lines
13 KiB
Python

#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
import logging
import os
from dataclasses import InitVar, dataclass
from typing import Any, Callable, Mapping, MutableMapping, Optional, Union
from urllib.parse import urljoin
import requests
from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator, NoAuth
from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_request_options_provider import (
InterpolatedRequestOptionsProvider,
)
from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod, Requester
from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository
from airbyte_cdk.sources.streams.http import HttpClient
from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
from airbyte_cdk.utils.mapping_helpers import combine_mappings
@dataclass
class HttpRequester(Requester):
"""
Default implementation of a Requester
Attributes:
name (str): Name of the stream. Only used for request/response caching
url_base (Union[InterpolatedString, str]): Base url to send requests to
path (Union[InterpolatedString, str]): Path to send requests to
http_method (Union[str, HttpMethod]): HTTP method to use when sending requests
request_options_provider (Optional[InterpolatedRequestOptionsProvider]): request option provider defining the options to set on outgoing requests
authenticator (DeclarativeAuthenticator): Authenticator defining how to authenticate to the source
error_handler (Optional[ErrorHandler]): Error handler defining how to detect and handle errors
backoff_strategies (Optional[List[BackoffStrategy]]): List of backoff strategies to use when retrying requests
config (Config): The user-provided configuration as specified by the source's spec
use_cache (bool): Indicates that data should be cached for this stream
"""
name: str
url_base: Union[InterpolatedString, str]
path: Union[InterpolatedString, str]
config: Config
parameters: InitVar[Mapping[str, Any]]
authenticator: Optional[DeclarativeAuthenticator] = None
http_method: Union[str, HttpMethod] = HttpMethod.GET
request_options_provider: Optional[InterpolatedRequestOptionsProvider] = None
error_handler: Optional[ErrorHandler] = None
disable_retries: bool = False
message_repository: MessageRepository = NoopMessageRepository()
use_cache: bool = False
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
self._url_base = InterpolatedString.create(self.url_base, parameters=parameters)
self._path = InterpolatedString.create(self.path, parameters=parameters)
if self.request_options_provider is None:
self._request_options_provider = InterpolatedRequestOptionsProvider(config=self.config, parameters=parameters)
elif isinstance(self.request_options_provider, dict):
self._request_options_provider = InterpolatedRequestOptionsProvider(config=self.config, **self.request_options_provider)
else:
self._request_options_provider = self.request_options_provider
self._authenticator = self.authenticator or NoAuth(parameters=parameters)
self._http_method = HttpMethod[self.http_method] if isinstance(self.http_method, str) else self.http_method
self.error_handler = self.error_handler
self._parameters = parameters
self.decoder = JsonDecoder(parameters={})
if self.error_handler is not None and hasattr(self.error_handler, "backoff_strategies"):
backoff_strategies = self.error_handler.backoff_strategies
else:
backoff_strategies = None
self._http_client = HttpClient(
name=self.name,
logger=self.logger,
error_handler=self.error_handler,
authenticator=self._authenticator,
use_cache=self.use_cache,
backoff_strategy=backoff_strategies,
disable_retries=self.disable_retries,
message_repository=self.message_repository,
)
def get_authenticator(self) -> DeclarativeAuthenticator:
return self._authenticator
def get_url_base(self) -> str:
return os.path.join(self._url_base.eval(self.config), "")
def get_path(
self, *, stream_state: Optional[StreamState], stream_slice: Optional[StreamSlice], next_page_token: Optional[Mapping[str, Any]]
) -> str:
kwargs = {"stream_state": stream_state, "stream_slice": stream_slice, "next_page_token": next_page_token}
path = str(self._path.eval(self.config, **kwargs))
return path.lstrip("/")
def get_method(self) -> HttpMethod:
return self._http_method
def get_request_params(
self,
*,
stream_state: Optional[StreamState] = None,
stream_slice: Optional[StreamSlice] = None,
next_page_token: Optional[Mapping[str, Any]] = None,
) -> MutableMapping[str, Any]:
return self._request_options_provider.get_request_params(
stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token
)
def get_request_headers(
self,
*,
stream_state: Optional[StreamState] = None,
stream_slice: Optional[StreamSlice] = None,
next_page_token: Optional[Mapping[str, Any]] = None,
) -> Mapping[str, Any]:
return self._request_options_provider.get_request_headers(
stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token
)
# fixing request options provider types has a lot of dependencies
def get_request_body_data( # type: ignore
self,
*,
stream_state: Optional[StreamState] = None,
stream_slice: Optional[StreamSlice] = None,
next_page_token: Optional[Mapping[str, Any]] = None,
) -> Union[Mapping[str, Any], str]:
return (
self._request_options_provider.get_request_body_data(
stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token
)
or {}
)
# fixing request options provider types has a lot of dependencies
def get_request_body_json( # type: ignore
self,
*,
stream_state: Optional[StreamState] = None,
stream_slice: Optional[StreamSlice] = None,
next_page_token: Optional[Mapping[str, Any]] = None,
) -> Optional[Mapping[str, Any]]:
return self._request_options_provider.get_request_body_json(
stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token
)
@property
def logger(self) -> logging.Logger:
return logging.getLogger(f"airbyte.HttpRequester.{self.name}")
def _get_request_options(
self,
stream_state: Optional[StreamState],
stream_slice: Optional[StreamSlice],
next_page_token: Optional[Mapping[str, Any]],
requester_method: Callable[..., Optional[Union[Mapping[str, Any], str]]],
auth_options_method: Callable[..., Optional[Union[Mapping[str, Any], str]]],
extra_options: Optional[Union[Mapping[str, Any], str]] = None,
) -> Union[Mapping[str, Any], str]:
"""
Get the request_option from the requester, the authenticator and extra_options passed in.
Raise a ValueError if there's a key collision
Returned merged mapping otherwise
"""
return combine_mappings(
[
requester_method(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token),
auth_options_method(),
extra_options,
]
)
def _request_headers(
self,
stream_state: Optional[StreamState] = None,
stream_slice: Optional[StreamSlice] = None,
next_page_token: Optional[Mapping[str, Any]] = None,
extra_headers: Optional[Mapping[str, Any]] = None,
) -> Mapping[str, Any]:
"""
Specifies request headers.
Authentication headers will overwrite any overlapping headers returned from this method.
"""
headers = self._get_request_options(
stream_state,
stream_slice,
next_page_token,
self.get_request_headers,
self.get_authenticator().get_auth_header,
extra_headers,
)
if isinstance(headers, str):
raise ValueError("Request headers cannot be a string")
return {str(k): str(v) for k, v in headers.items()}
def _request_params(
self,
stream_state: Optional[StreamState],
stream_slice: Optional[StreamSlice],
next_page_token: Optional[Mapping[str, Any]],
extra_params: Optional[Mapping[str, Any]] = None,
) -> Mapping[str, Any]:
"""
Specifies the query parameters that should be set on an outgoing HTTP request given the inputs.
E.g: you might want to define query parameters for paging if next_page_token is not None.
"""
options = self._get_request_options(
stream_state, stream_slice, next_page_token, self.get_request_params, self.get_authenticator().get_request_params, extra_params
)
if isinstance(options, str):
raise ValueError("Request params cannot be a string")
for k, v in options.items():
if isinstance(v, (dict,)):
raise ValueError(f"Invalid value for `{k}` parameter. The values of request params cannot be an object.")
return options
def _request_body_data(
self,
stream_state: Optional[StreamState],
stream_slice: Optional[StreamSlice],
next_page_token: Optional[Mapping[str, Any]],
extra_body_data: Optional[Union[Mapping[str, Any], str]] = None,
) -> Optional[Union[Mapping[str, Any], str]]:
"""
Specifies how to populate the body of the request with a non-JSON payload.
If returns a ready text that it will be sent as is.
If returns a dict that it will be converted to a urlencoded form.
E.g. {"key1": "value1", "key2": "value2"} => "key1=value1&key2=value2"
At the same time only one of the 'request_body_data' and 'request_body_json' functions can be overridden.
"""
# Warning: use self.state instead of the stream_state passed as argument!
return self._get_request_options(
stream_state,
stream_slice,
next_page_token,
self.get_request_body_data,
self.get_authenticator().get_request_body_data,
extra_body_data,
)
def _request_body_json(
self,
stream_state: Optional[StreamState],
stream_slice: Optional[StreamSlice],
next_page_token: Optional[Mapping[str, Any]],
extra_body_json: Optional[Mapping[str, Any]] = None,
) -> Optional[Mapping[str, Any]]:
"""
Specifies how to populate the body of the request with a JSON payload.
At the same time only one of the 'request_body_data' and 'request_body_json' functions can be overridden.
"""
# Warning: use self.state instead of the stream_state passed as argument!
options = self._get_request_options(
stream_state,
stream_slice,
next_page_token,
self.get_request_body_json,
self.get_authenticator().get_request_body_json,
extra_body_json,
)
if isinstance(options, str):
raise ValueError("Request body json cannot be a string")
return options
@classmethod
def _join_url(cls, url_base: str, path: str) -> str:
return urljoin(url_base, path)
def send_request(
self,
stream_state: Optional[StreamState] = None,
stream_slice: Optional[StreamSlice] = None,
next_page_token: Optional[Mapping[str, Any]] = None,
path: Optional[str] = None,
request_headers: Optional[Mapping[str, Any]] = None,
request_params: Optional[Mapping[str, Any]] = None,
request_body_data: Optional[Union[Mapping[str, Any], str]] = None,
request_body_json: Optional[Mapping[str, Any]] = None,
log_formatter: Optional[Callable[[requests.Response], Any]] = None,
) -> Optional[requests.Response]:
request, response = self._http_client.send_request(
http_method=self.get_method().value,
url=self._join_url(
self.get_url_base(),
path or self.get_path(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token),
),
request_kwargs={},
headers=self._request_headers(stream_state, stream_slice, next_page_token, request_headers),
params=self._request_params(stream_state, stream_slice, next_page_token, request_params),
json=self._request_body_json(stream_state, stream_slice, next_page_token, request_body_json),
data=self._request_body_data(stream_state, stream_slice, next_page_token, request_body_data),
dedupe_query_params=True,
log_formatter=log_formatter,
)
return response