* checkout from alex/cac * checkout from alex/cac * checkout from alex/cac * Add missing tests * Add missing files * Add missing tests * add missing file * missing file * missing file * rename * doc * doc * remove broken test * rename * jinja dependency * Add comment * comment * comment * pyjq dependency * rename file * delete unused file * Revert "delete unused file" This reverts commit758e939367. * fix * rename * abstract property * delete unused field * delete unused field * rename * pass kwargs directly * isort * Revert "isort" This reverts commit4a79223944. * isort * update state * fix imports * update dependency * format * rename file * decoder * Use decoder * Update comment * dict_state is actually backed by a dict * Add a comment * update state takes kwargs * move state out of offset paginator * update jq parameter order * update * remove incremental mixin * delete comment * update comments * update comments * remove no_state * rename package * checkout from alex/cac * Add missing tests * Add missing files * missing file * rename * jinja dependency * Add comment * comment * comment * Revert "delete unused file" This reverts commit758e939367. * delete unused field * delete unused field * rename * pass kwargs directly * isort * Revert "isort" This reverts commit4a79223944. * format * decoder * better error handling * remove nostate * isort * remove print * move test * delete duplicates * delete dead code * Update mapping type to [str, Any] * add comment * Add comment * pass parameters through kwargs * pass parameters through kwargs * update interface to pass source in interface * update interface to pass source in interface * rename to stream_slicer * Allow passing a string or an enum * Define StateType enum * convert state_type if not of type type * convert state_type if not of type type * Low code connectors: string interpolation with jinja (#12852) * checkout from alex/cac * Add missing tests * Add missing files * missing file * rename * jinja dependency * Add comment * comment * comment * Revert "delete unused file" This reverts commit758e939367. * delete unused field * delete unused field * rename * pass kwargs directly * isort * Revert "isort" This reverts commit4a79223944. * format * decoder * better error handling * remove nostate * isort * delete dead code * Update mapping type to [str, Any] * add comment * Add comment * pass parameters through kwargs * move test to right module * Add missing test * Use authbase instead of deprecated class * leverage generator * Delete dead code * rename methods * rename to declarative * rename the classes too * Try to install packages to build jq * isort * only automake * Revert "only automake" This reverts commitc8fe154ffc. * remove git * format * Add jq dependency * Use request header provider * rename * rename field * remove get_context method * rename
244 lines
9.2 KiB
Python
244 lines
9.2 KiB
Python
#
|
|
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
|
|
#
|
|
|
|
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Union
|
|
|
|
import requests
|
|
from airbyte_cdk.models import SyncMode
|
|
from airbyte_cdk.sources.declarative.extractors.http_extractor import HttpExtractor
|
|
from airbyte_cdk.sources.declarative.requesters.paginators.paginator import Paginator
|
|
from airbyte_cdk.sources.declarative.requesters.requester import Requester
|
|
from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
|
|
from airbyte_cdk.sources.declarative.states.state import State
|
|
from airbyte_cdk.sources.declarative.stream_slicers.stream_slicer import StreamSlicer
|
|
from airbyte_cdk.sources.streams.http import HttpStream
|
|
|
|
|
|
class SimpleRetriever(Retriever, HttpStream):
|
|
def __init__(
|
|
self,
|
|
name,
|
|
primary_key,
|
|
requester: Requester,
|
|
paginator: Paginator,
|
|
extractor: HttpExtractor,
|
|
stream_slicer: StreamSlicer,
|
|
state: State,
|
|
):
|
|
self._name = name
|
|
self._primary_key = primary_key
|
|
self._paginator = paginator
|
|
self._requester = requester
|
|
self._extractor = extractor
|
|
super().__init__(self._requester.get_authenticator())
|
|
self._iterator: StreamSlicer = stream_slicer
|
|
self._state: State = state.deep_copy()
|
|
self._last_response = None
|
|
self._last_records = None
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
"""
|
|
:return: Stream name
|
|
"""
|
|
return self._name
|
|
|
|
@property
|
|
def url_base(self) -> str:
|
|
return self._requester.get_url_base()
|
|
|
|
@property
|
|
def http_method(self) -> str:
|
|
return str(self._requester.get_method().value)
|
|
|
|
@property
|
|
def raise_on_http_errors(self) -> bool:
|
|
"""
|
|
If set to False, allows opting-out of raising HTTP code exception.
|
|
"""
|
|
return self._requester.raise_on_http_errors
|
|
|
|
@property
|
|
def max_retries(self) -> Union[int, None]:
|
|
"""
|
|
Specifies maximum amount of retries for backoff policy. Return None for no limit.
|
|
"""
|
|
return self._requester.max_retries
|
|
|
|
@property
|
|
def retry_factor(self) -> float:
|
|
"""
|
|
Specifies factor to multiply the exponentiation by for backoff policy.
|
|
"""
|
|
return self._requester.retry_factor
|
|
|
|
def should_retry(self, response: requests.Response) -> bool:
|
|
"""
|
|
Specifies conditions for backoff based on the response from the server.
|
|
|
|
By default, back off on the following HTTP response statuses:
|
|
- 429 (Too Many Requests) indicating rate limiting
|
|
- 500s to handle transient server errors
|
|
|
|
Unexpected but transient exceptions (connection timeout, DNS resolution failed, etc..) are retried by default.
|
|
"""
|
|
return self._requester.should_retry(response)
|
|
|
|
def backoff_time(self, response: requests.Response) -> Optional[float]:
|
|
"""
|
|
Specifies backoff time.
|
|
|
|
This method is called only if should_backoff() returns True for the input request.
|
|
|
|
:param response:
|
|
:return how long to backoff in seconds. The return value may be a floating point number for subsecond precision. Returning None defers backoff
|
|
to the default backoff behavior (e.g using an exponential algorithm).
|
|
"""
|
|
return self._requester.backoff_time(response)
|
|
|
|
def request_headers(
|
|
self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
|
|
) -> Mapping[str, Any]:
|
|
"""
|
|
Specifies request headers.
|
|
Authentication headers will overwrite any overlapping headers returned from this method.
|
|
"""
|
|
return self._requester.request_headers(stream_state, stream_slice, next_page_token)
|
|
|
|
def request_body_data(
|
|
self,
|
|
stream_state: Mapping[str, Any],
|
|
stream_slice: Mapping[str, Any] = None,
|
|
next_page_token: Mapping[str, Any] = None,
|
|
) -> Optional[Union[Mapping, str]]:
|
|
"""
|
|
Specifies how to populate the body of the request with a non-JSON payload.
|
|
|
|
If returns a ready text that it will be sent as is.
|
|
If returns a dict that it will be converted to a urlencoded form.
|
|
E.g. {"key1": "value1", "key2": "value2"} => "key1=value1&key2=value2"
|
|
|
|
At the same time only one of the 'request_body_data' and 'request_body_json' functions can be overridden.
|
|
"""
|
|
return self._requester.request_body_data(stream_state, stream_slice, next_page_token)
|
|
|
|
def request_body_json(
|
|
self,
|
|
stream_state: Mapping[str, Any],
|
|
stream_slice: Mapping[str, Any] = None,
|
|
next_page_token: Mapping[str, Any] = None,
|
|
) -> Optional[Mapping]:
|
|
"""
|
|
Specifies how to populate the body of the request with a JSON payload.
|
|
|
|
At the same time only one of the 'request_body_data' and 'request_body_json' functions can be overridden.
|
|
"""
|
|
return self._requester.request_body_json(stream_state, stream_slice, next_page_token)
|
|
|
|
def request_kwargs(
|
|
self,
|
|
stream_state: Mapping[str, Any],
|
|
stream_slice: Mapping[str, Any] = None,
|
|
next_page_token: Mapping[str, Any] = None,
|
|
) -> Mapping[str, Any]:
|
|
"""
|
|
Specifies how to configure a mapping of keyword arguments to be used when creating the HTTP request.
|
|
Any option listed in https://docs.python-requests.org/en/latest/api/#requests.adapters.BaseAdapter.send for can be returned from
|
|
this method. Note that these options do not conflict with request-level options such as headers, request params, etc..
|
|
"""
|
|
return self._requester.request_kwargs(stream_state, stream_slice, next_page_token)
|
|
|
|
def path(
|
|
self, *, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
|
|
) -> str:
|
|
return self._requester.get_path(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token)
|
|
|
|
def request_params(
|
|
self,
|
|
stream_state: Mapping[str, Any],
|
|
stream_slice: Mapping[str, Any] = None,
|
|
next_page_token: Mapping[str, Any] = None,
|
|
) -> MutableMapping[str, Any]:
|
|
"""
|
|
Specifies the query parameters that should be set on an outgoing HTTP request given the inputs.
|
|
|
|
E.g: you might want to define query parameters for paging if next_page_token is not None.
|
|
"""
|
|
return self._requester.request_params(stream_state, stream_slice, next_page_token)
|
|
|
|
@property
|
|
def cache_filename(self):
|
|
"""
|
|
Return the name of cache file
|
|
"""
|
|
return self._requester.cache_filename
|
|
|
|
@property
|
|
def use_cache(self):
|
|
"""
|
|
If True, all records will be cached.
|
|
"""
|
|
return self._requester.use_cache
|
|
|
|
def parse_response(
|
|
self,
|
|
response: requests.Response,
|
|
*,
|
|
stream_state: Mapping[str, Any],
|
|
stream_slice: Mapping[str, Any] = None,
|
|
next_page_token: Mapping[str, Any] = None,
|
|
) -> Iterable[Mapping]:
|
|
self._last_response = response
|
|
records = self._extractor.extract_records(response)
|
|
self._last_records = records
|
|
return records
|
|
|
|
@property
|
|
def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]:
|
|
return self._primary_key
|
|
|
|
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
|
|
"""
|
|
Specifies a pagination strategy.
|
|
|
|
The value returned from this method is passed to most other methods in this class. Use it to form a request e.g: set headers or query params.
|
|
|
|
:return: The token for the next page from the input response object. Returning None means there are no more pages to read in this response.
|
|
"""
|
|
return self._paginator.next_page_token(response, self._last_records)
|
|
|
|
def read_records(
|
|
self,
|
|
sync_mode: SyncMode,
|
|
cursor_field: List[str] = None,
|
|
stream_slice: Mapping[str, Any] = None,
|
|
stream_state: Mapping[str, Any] = None,
|
|
) -> Iterable[Mapping[str, Any]]:
|
|
records_generator = HttpStream.read_records(self, sync_mode, cursor_field, stream_slice, stream_state)
|
|
for r in records_generator:
|
|
self._state.update_state(stream_slice=stream_slice, stream_state=stream_state, last_response=self._last_response, last_record=r)
|
|
yield r
|
|
else:
|
|
self._state.update_state(
|
|
stream_slice=stream_slice, stream_state=stream_state, last_reponse=self._last_response, last_record=None
|
|
)
|
|
yield from []
|
|
|
|
def stream_slices(
|
|
self, *, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None
|
|
) -> Iterable[Optional[Mapping[str, Any]]]:
|
|
"""
|
|
Specifies the slices for this stream. See the stream slicing section of the docs for more information.
|
|
|
|
:param sync_mode:
|
|
:param cursor_field:
|
|
:param stream_state:
|
|
:return:
|
|
"""
|
|
# FIXME: this is not passing the cursor field because it is always known at init time
|
|
return self._iterator.stream_slices(sync_mode, stream_state)
|
|
|
|
def get_state(self) -> MutableMapping[str, Any]:
|
|
return self._state.get_stream_state()
|