1
0
mirror of synced 2026-01-01 09:02:59 -05:00
Files
airbyte/airbyte-cdk/python/unit_tests/sources/streams/http/test_http.py
Brian Lai be01b476ce Add new InterpolatedRequestOptionsProvider that encapsulates all variations of request arguments (#13472)
* write out new request options provider and refactor components and parts of the YAML config

* fix formatting

* pr feedback to consolidate body_data_provider to simplify the code

* pr feedback get rid of extraneous optional
2022-06-21 16:01:05 -04:00

497 lines
18 KiB
Python

#
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
#
import json
from http import HTTPStatus
from typing import Any, Iterable, Mapping, Optional
from unittest.mock import ANY, MagicMock, patch
import pytest
import requests
from airbyte_cdk.models import SyncMode
from airbyte_cdk.sources.streams.http import HttpStream, HttpSubStream
from airbyte_cdk.sources.streams.http.auth import NoAuth
from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator as HttpTokenAuthenticator
from airbyte_cdk.sources.streams.http.exceptions import DefaultBackoffException, RequestBodyException, UserDefinedBackoffException
from airbyte_cdk.sources.streams.http.requests_native_auth import TokenAuthenticator
class StubBasicReadHttpStream(HttpStream):
url_base = "https://test_base_url.com"
primary_key = ""
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.resp_counter = 1
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
return None
def path(self, **kwargs) -> str:
return ""
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
stubResp = {"data": self.resp_counter}
self.resp_counter += 1
yield stubResp
def test_default_authenticator():
stream = StubBasicReadHttpStream()
assert isinstance(stream.authenticator, NoAuth)
assert stream._session.auth is None
def test_requests_native_token_authenticator():
stream = StubBasicReadHttpStream(authenticator=TokenAuthenticator("test-token"))
assert isinstance(stream.authenticator, NoAuth)
assert isinstance(stream._session.auth, TokenAuthenticator)
def test_http_token_authenticator():
stream = StubBasicReadHttpStream(authenticator=HttpTokenAuthenticator("test-token"))
assert isinstance(stream.authenticator, HttpTokenAuthenticator)
assert stream._session.auth is None
def test_request_kwargs_used(mocker, requests_mock):
stream = StubBasicReadHttpStream()
request_kwargs = {"cert": None, "proxies": "google.com"}
mocker.patch.object(stream, "request_kwargs", return_value=request_kwargs)
send_mock = mocker.patch.object(stream._session, "send", wraps=stream._session.send)
requests_mock.register_uri("GET", stream.url_base)
list(stream.read_records(sync_mode=SyncMode.full_refresh))
stream._session.send.assert_any_call(ANY, **request_kwargs)
assert send_mock.call_count == 1
def test_stub_basic_read_http_stream_read_records(mocker):
stream = StubBasicReadHttpStream()
blank_response = {} # Send a blank response is fine as we ignore the response in `parse_response anyway.
mocker.patch.object(StubBasicReadHttpStream, "_send_request", return_value=blank_response)
records = list(stream.read_records(SyncMode.full_refresh))
assert [{"data": 1}] == records
class StubNextPageTokenHttpStream(StubBasicReadHttpStream):
current_page = 0
def __init__(self, pages: int = 5):
super().__init__()
self._pages = pages
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
while self.current_page < self._pages:
page_token = {"page": self.current_page}
self.current_page += 1
return page_token
return None
def test_next_page_token_is_input_to_other_methods(mocker):
"""Validates that the return value from next_page_token is passed into other methods that need it like request_params, headers, body, etc.."""
pages = 5
stream = StubNextPageTokenHttpStream(pages=pages)
blank_response = {} # Send a blank response is fine as we ignore the response in `parse_response anyway.
mocker.patch.object(StubNextPageTokenHttpStream, "_send_request", return_value=blank_response)
methods = ["request_params", "request_headers", "request_body_json"]
for method in methods:
# Wrap all methods we're interested in testing with mocked objects so we can later spy on their input args and verify they were what we expect
mocker.patch.object(stream, method, wraps=getattr(stream, method))
records = list(stream.read_records(SyncMode.full_refresh))
# Since we have 5 pages, we expect 5 tokens which are {"page":1}, {"page":2}, etc...
expected_next_page_tokens = [{"page": i} for i in range(pages)]
for method in methods:
# First assert that they were called with no next_page_token. This is the first call in the pagination loop.
getattr(stream, method).assert_any_call(next_page_token=None, stream_slice=None, stream_state={})
for token in expected_next_page_tokens:
# Then verify that each method
getattr(stream, method).assert_any_call(next_page_token=token, stream_slice=None, stream_state={})
expected = [{"data": 1}, {"data": 2}, {"data": 3}, {"data": 4}, {"data": 5}, {"data": 6}]
assert expected == records
class StubBadUrlHttpStream(StubBasicReadHttpStream):
url_base = "bad_url"
def test_stub_bad_url_http_stream_read_records(mocker):
stream = StubBadUrlHttpStream()
with pytest.raises(requests.exceptions.RequestException):
list(stream.read_records(SyncMode.full_refresh))
class StubCustomBackoffHttpStream(StubBasicReadHttpStream):
def backoff_time(self, response: requests.Response) -> Optional[float]:
return 0.5
def test_stub_custom_backoff_http_stream(mocker):
mocker.patch("time.sleep", lambda x: None)
stream = StubCustomBackoffHttpStream()
req = requests.Response()
req.status_code = 429
send_mock = mocker.patch.object(requests.Session, "send", return_value=req)
with pytest.raises(UserDefinedBackoffException):
list(stream.read_records(SyncMode.full_refresh))
assert send_mock.call_count == stream.max_retries + 1
# TODO(davin): Figure out how to assert calls.
@pytest.mark.parametrize("retries", [-20, -1, 0, 1, 2, 10])
def test_stub_custom_backoff_http_stream_retries(mocker, retries):
mocker.patch("time.sleep", lambda x: None)
class StubCustomBackoffHttpStreamRetries(StubCustomBackoffHttpStream):
@property
def max_retries(self):
return retries
stream = StubCustomBackoffHttpStreamRetries()
req = requests.Response()
req.status_code = HTTPStatus.TOO_MANY_REQUESTS
send_mock = mocker.patch.object(requests.Session, "send", return_value=req)
with pytest.raises(UserDefinedBackoffException, match="Request URL: https://test_base_url.com/, Response Code: 429") as excinfo:
list(stream.read_records(SyncMode.full_refresh))
assert isinstance(excinfo.value.request, requests.PreparedRequest)
assert isinstance(excinfo.value.response, requests.Response)
if retries <= 0:
assert send_mock.call_count == 1
else:
assert send_mock.call_count == stream.max_retries + 1
def test_stub_custom_backoff_http_stream_endless_retries(mocker):
mocker.patch("time.sleep", lambda x: None)
class StubCustomBackoffHttpStreamRetries(StubCustomBackoffHttpStream):
@property
def max_retries(self):
return None
infinite_number = 20
stream = StubCustomBackoffHttpStreamRetries()
req = requests.Response()
req.status_code = HTTPStatus.TOO_MANY_REQUESTS
send_mock = mocker.patch.object(requests.Session, "send", side_effect=[req] * infinite_number)
# Expecting mock object to raise a RuntimeError when the end of side_effect list parameter reached.
with pytest.raises(RuntimeError):
list(stream.read_records(SyncMode.full_refresh))
assert send_mock.call_count == infinite_number + 1
@pytest.mark.parametrize("http_code", [400, 401, 403])
def test_4xx_error_codes_http_stream(mocker, http_code):
stream = StubCustomBackoffHttpStream()
req = requests.Response()
req.status_code = http_code
mocker.patch.object(requests.Session, "send", return_value=req)
with pytest.raises(requests.exceptions.HTTPError):
list(stream.read_records(SyncMode.full_refresh))
class AutoFailFalseHttpStream(StubBasicReadHttpStream):
raise_on_http_errors = False
max_retries = 3
retry_factor = 0.01
def test_raise_on_http_errors_off_429(mocker):
stream = AutoFailFalseHttpStream()
req = requests.Response()
req.status_code = 429
mocker.patch.object(requests.Session, "send", return_value=req)
with pytest.raises(DefaultBackoffException, match="Request URL: https://test_base_url.com/, Response Code: 429"):
list(stream.read_records(SyncMode.full_refresh))
@pytest.mark.parametrize("status_code", [500, 501, 503, 504])
def test_raise_on_http_errors_off_5xx(mocker, status_code):
stream = AutoFailFalseHttpStream()
req = requests.Response()
req.status_code = status_code
send_mock = mocker.patch.object(requests.Session, "send", return_value=req)
with pytest.raises(DefaultBackoffException):
list(stream.read_records(SyncMode.full_refresh))
assert send_mock.call_count == stream.max_retries + 1
@pytest.mark.parametrize("status_code", [400, 401, 402, 403, 416])
def test_raise_on_http_errors_off_non_retryable_4xx(mocker, status_code):
stream = AutoFailFalseHttpStream()
req = requests.Response()
req.status_code = status_code
mocker.patch.object(requests.Session, "send", return_value=req)
response = stream._send_request(req, {})
assert response.status_code == status_code
@pytest.mark.parametrize(
"error",
(
requests.exceptions.ConnectTimeout,
requests.exceptions.ConnectionError,
requests.exceptions.ChunkedEncodingError,
requests.exceptions.ReadTimeout,
),
)
def test_raise_on_http_errors(mocker, error):
stream = AutoFailFalseHttpStream()
send_mock = mocker.patch.object(requests.Session, "send", side_effect=error())
with pytest.raises(error):
list(stream.read_records(SyncMode.full_refresh))
assert send_mock.call_count == stream.max_retries + 1
class PostHttpStream(StubBasicReadHttpStream):
http_method = "POST"
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
"""Returns response data as is"""
yield response.json()
class TestRequestBody:
"""Suite of different tests for request bodies"""
json_body = {"key": "value"}
data_body = "key:value"
form_body = {"key1": "value1", "key2": 1234}
urlencoded_form_body = "key1=value1&key2=1234"
def request2response(self, request, context):
return json.dumps({"body": request.text, "content_type": request.headers.get("Content-Type")})
def test_json_body(self, mocker, requests_mock):
stream = PostHttpStream()
mocker.patch.object(stream, "request_body_json", return_value=self.json_body)
requests_mock.register_uri("POST", stream.url_base, text=self.request2response)
response = list(stream.read_records(sync_mode=SyncMode.full_refresh))[0]
assert response["content_type"] == "application/json"
assert json.loads(response["body"]) == self.json_body
def test_text_body(self, mocker, requests_mock):
stream = PostHttpStream()
mocker.patch.object(stream, "request_body_data", return_value=self.data_body)
requests_mock.register_uri("POST", stream.url_base, text=self.request2response)
response = list(stream.read_records(sync_mode=SyncMode.full_refresh))[0]
assert response["content_type"] is None
assert response["body"] == self.data_body
def test_form_body(self, mocker, requests_mock):
stream = PostHttpStream()
mocker.patch.object(stream, "request_body_data", return_value=self.form_body)
requests_mock.register_uri("POST", stream.url_base, text=self.request2response)
response = list(stream.read_records(sync_mode=SyncMode.full_refresh))[0]
assert response["content_type"] == "application/x-www-form-urlencoded"
assert response["body"] == self.urlencoded_form_body
def test_text_json_body(self, mocker, requests_mock):
"""checks a exception if both functions were overridden"""
stream = PostHttpStream()
mocker.patch.object(stream, "request_body_data", return_value=self.data_body)
mocker.patch.object(stream, "request_body_json", return_value=self.json_body)
requests_mock.register_uri("POST", stream.url_base, text=self.request2response)
with pytest.raises(RequestBodyException):
list(stream.read_records(sync_mode=SyncMode.full_refresh))
def test_body_for_all_methods(self, mocker, requests_mock):
"""Stream must send a body for GET/POST/PATCH/PUT methods only"""
stream = PostHttpStream()
methods = {
"POST": True,
"PUT": True,
"PATCH": True,
"GET": True,
"DELETE": False,
"OPTIONS": False,
}
for method, with_body in methods.items():
stream.http_method = method
mocker.patch.object(stream, "request_body_data", return_value=self.data_body)
requests_mock.register_uri(method, stream.url_base, text=self.request2response)
response = list(stream.read_records(sync_mode=SyncMode.full_refresh))[0]
if with_body:
assert response["body"] == self.data_body
else:
assert response["body"] is None
class CacheHttpStream(StubBasicReadHttpStream):
use_cache = True
class CacheHttpSubStream(HttpSubStream):
url_base = "https://example.com"
primary_key = ""
def __init__(self, parent):
super().__init__(parent=parent)
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
return []
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
return None
def path(self, **kwargs) -> str:
return ""
def test_caching_filename():
stream = CacheHttpStream()
assert stream.cache_filename == f"{stream.name}.yml"
def test_caching_cassettes_are_different():
stream_1 = CacheHttpStream()
stream_2 = CacheHttpStream()
assert stream_1.cache_file != stream_2.cache_file
def test_parent_attribute_exist():
parent_stream = CacheHttpStream()
child_stream = CacheHttpSubStream(parent=parent_stream)
assert child_stream.parent == parent_stream
def test_cache_response(mocker):
stream = CacheHttpStream()
mocker.patch.object(stream, "url_base", "https://google.com/")
list(stream.read_records(sync_mode=SyncMode.full_refresh))
with open(stream.cache_filename, "r") as f:
assert f.read()
class CacheHttpStreamWithSlices(CacheHttpStream):
paths = ["", "search"]
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
return f'{stream_slice["path"]}' if stream_slice else ""
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]:
for path in self.paths:
yield {"path": path}
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
yield {"value": len(response.text)}
@patch("airbyte_cdk.sources.streams.core.logging", MagicMock())
def test_using_cache(mocker):
parent_stream = CacheHttpStreamWithSlices()
mocker.patch.object(parent_stream, "url_base", "https://google.com/")
for _slice in parent_stream.stream_slices():
list(parent_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=_slice))
child_stream = CacheHttpSubStream(parent=parent_stream)
for _slice in child_stream.stream_slices(sync_mode=SyncMode.full_refresh):
pass
assert parent_stream.cassete.play_count != 0
class AutoFailTrueHttpStream(StubBasicReadHttpStream):
raise_on_http_errors = True
@pytest.mark.parametrize("status_code", range(400, 600))
def test_send_raise_on_http_errors_logs(mocker, status_code):
mocker.patch.object(AutoFailTrueHttpStream, "logger")
mocker.patch.object(AutoFailTrueHttpStream, "should_retry", mocker.Mock(return_value=False))
stream = AutoFailTrueHttpStream()
req = requests.Response()
req.status_code = status_code
mocker.patch.object(requests.Session, "send", return_value=req)
with pytest.raises(requests.exceptions.HTTPError):
response = stream._send_request(req, {})
stream.logger.error.assert_called_with(response.text)
assert response.status_code == status_code
@pytest.mark.parametrize(
"api_response, expected_message",
[
({"error": "something broke"}, "something broke"),
({"error": {"message": "something broke"}}, "something broke"),
({"error": "err-001", "message": "something broke"}, "something broke"),
({"failure": {"message": "something broke"}}, "something broke"),
({"error": {"errors": [{"message": "one"}, {"message": "two"}, {"message": "three"}]}}, "one, two, three"),
({"errors": ["one", "two", "three"]}, "one, two, three"),
({"messages": ["one", "two", "three"]}, "one, two, three"),
({"errors": [{"message": "one"}, {"message": "two"}, {"message": "three"}]}, "one, two, three"),
({"error": [{"message": "one"}, {"message": "two"}, {"message": "three"}]}, "one, two, three"),
({"errors": [{"error": "one"}, {"error": "two"}, {"error": "three"}]}, "one, two, three"),
({"failures": [{"message": "one"}, {"message": "two"}, {"message": "three"}]}, "one, two, three"),
(["one", "two", "three"], "one, two, three"),
([{"error": "one"}, {"error": "two"}, {"error": "three"}], "one, two, three"),
({"error": True}, None),
({"something_else": "hi"}, None),
({}, None),
],
)
def test_default_parse_response_error_message(api_response: dict, expected_message: Optional[str]):
stream = StubBasicReadHttpStream()
response = MagicMock()
response.json.return_value = api_response
message = stream.parse_response_error_message(response)
assert message == expected_message
def test_default_parse_response_error_message_not_json(requests_mock):
stream = StubBasicReadHttpStream()
requests_mock.register_uri("GET", "mock://test.com/not_json", text="this is not json")
response = requests.get("mock://test.com/not_json")
message = stream.parse_response_error_message(response)
assert message is None
def test_default_get_error_display_message_handles_http_error(mocker):
stream = StubBasicReadHttpStream()
mocker.patch.object(stream, "parse_response_error_message", return_value="my custom message")
non_http_err_msg = stream.get_error_display_message(RuntimeError("not me"))
assert non_http_err_msg is None
http_err_msg = stream.get_error_display_message(requests.HTTPError())
assert http_err_msg == "my custom message"