1
0
mirror of synced 2026-02-03 01:02:02 -05:00
Files
airbyte/airbyte-cdk/python/airbyte_cdk/sources/streams/http/rate_limiting.py
Alexandre Girard df01616951 [Issue #23497] Deduplicate query parameters for declarative connectors (#28550)
* remove duplicate param

* remove duplicate params

* fix some of the typing issues

* fix typing issues

* newline

* format

* Enable by default

* Add missing file

* refactor and remove flag

* none check

* move line of code

* fix typing in rate_limiting

* comment

* use typedef

* else branch

* format

* gate the feature

* rename test

* fix the test

* only dedupe if the values are the same

* Add some tests

* convert values to strings

* Document the change

* implement in requester too
2023-07-25 14:22:25 -07:00

91 lines
3.4 KiB
Python

#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
import logging
import sys
import time
from typing import Any, Callable, Mapping, Optional
import backoff
from requests import PreparedRequest, RequestException, Response, codes, exceptions
from .exceptions import DefaultBackoffException, UserDefinedBackoffException
TRANSIENT_EXCEPTIONS = (
DefaultBackoffException,
exceptions.ConnectTimeout,
exceptions.ReadTimeout,
exceptions.ConnectionError,
exceptions.ChunkedEncodingError,
)
logger = logging.getLogger("airbyte")
SendRequestCallableType = Callable[[PreparedRequest, Mapping[str, Any]], Response]
def default_backoff_handler(
max_tries: Optional[int], factor: float, **kwargs: Any
) -> Callable[[SendRequestCallableType], SendRequestCallableType]:
def log_retry_attempt(details: Mapping[str, Any]) -> None:
_, exc, _ = sys.exc_info()
if isinstance(exc, RequestException) and exc.response:
logger.info(f"Status code: {exc.response.status_code}, Response Content: {exc.response.content}")
logger.info(
f"Caught retryable error '{str(exc)}' after {details['tries']} tries. Waiting {details['wait']} seconds then retrying..."
)
def should_give_up(exc: Exception) -> bool:
# If a non-rate-limiting related 4XX error makes it this far, it means it was unexpected and probably consistent, so we shouldn't back off
if isinstance(exc, RequestException):
give_up: bool = (
exc.response is not None and exc.response.status_code != codes.too_many_requests and 400 <= exc.response.status_code < 500
)
if give_up:
logger.info(f"Giving up for returned HTTP status: {exc.response.status_code}")
return give_up
# Only RequestExceptions are retryable, so if we get here, it's not retryable
return False
return backoff.on_exception(
backoff.expo,
TRANSIENT_EXCEPTIONS,
jitter=None,
on_backoff=log_retry_attempt,
giveup=should_give_up,
max_tries=max_tries,
factor=factor,
**kwargs,
)
def user_defined_backoff_handler(max_tries: Optional[int], **kwargs: Any) -> Callable[[SendRequestCallableType], SendRequestCallableType]:
def sleep_on_ratelimit(details: Mapping[str, Any]) -> None:
_, exc, _ = sys.exc_info()
if isinstance(exc, UserDefinedBackoffException):
if exc.response:
logger.info(f"Status code: {exc.response.status_code}, Response Content: {exc.response.content}")
retry_after = exc.backoff
logger.info(f"Retrying. Sleeping for {retry_after} seconds")
time.sleep(retry_after + 1) # extra second to cover any fractions of second
def log_give_up(details: Mapping[str, Any]) -> None:
_, exc, _ = sys.exc_info()
if isinstance(exc, RequestException):
logger.error(f"Max retry limit reached. Request: {exc.request}, Response: {exc.response}")
else:
logger.error("Max retry limit reached for unknown request and response")
return backoff.on_exception(
backoff.constant,
UserDefinedBackoffException,
interval=0, # skip waiting, we'll wait in on_backoff handler
on_backoff=sleep_on_ratelimit,
on_giveup=log_give_up,
jitter=None,
max_tries=max_tries,
**kwargs,
)