1
0
mirror of synced 2025-12-22 19:38:29 -05:00
Files
airbyte/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py
2023-09-28 08:46:29 +02:00

1322 lines
51 KiB
Python

#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
import json
from http import HTTPStatus
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
import requests
import responses
from airbyte_cdk.sources.streams.http.exceptions import BaseBackoffException, UserDefinedBackoffException
from requests import HTTPError
from responses import matchers
from source_github import constants
from source_github.streams import (
Branches,
Collaborators,
Comments,
CommitCommentReactions,
CommitComments,
Commits,
Deployments,
IssueEvents,
IssueLabels,
IssueMilestones,
Organizations,
ProjectCards,
ProjectColumns,
Projects,
ProjectsV2,
PullRequestCommentReactions,
PullRequestCommits,
PullRequests,
PullRequestStats,
Releases,
Repositories,
RepositoryStats,
Reviews,
Stargazers,
Tags,
TeamMembers,
TeamMemberships,
Teams,
Users,
WorkflowJobs,
WorkflowRuns,
)
from source_github.utils import read_full_refresh
from .utils import ProjectsResponsesAPI, read_incremental
DEFAULT_BACKOFF_DELAYS = [5, 10, 20, 40, 80]
@responses.activate
@patch("time.sleep")
def test_internal_server_error_retry(time_mock):
args = {"authenticator": None, "repositories": ["airbytehq/airbyte"], "start_date": "start_date", "page_size_for_large_streams": 30}
stream = CommitCommentReactions(**args)
stream_slice = {"repository": "airbytehq/airbyte", "comment_id": "id"}
time_mock.reset_mock()
responses.add("GET", "https://api.github.com/repos/airbytehq/airbyte/comments/id/reactions", status=HTTPStatus.INTERNAL_SERVER_ERROR)
with pytest.raises(BaseBackoffException):
list(stream.read_records(sync_mode="full_refresh", stream_slice=stream_slice))
sleep_delays = [delay[0][0] for delay in time_mock.call_args_list]
assert sleep_delays == DEFAULT_BACKOFF_DELAYS
@pytest.mark.parametrize(
("http_status", "response_headers", "expected_backoff_time"),
[
(HTTPStatus.BAD_GATEWAY, {}, None),
(HTTPStatus.INTERNAL_SERVER_ERROR, {}, None),
(HTTPStatus.SERVICE_UNAVAILABLE, {}, None),
(HTTPStatus.FORBIDDEN, {"Retry-After": "0"}, 60),
(HTTPStatus.FORBIDDEN, {"Retry-After": "30"}, 60),
(HTTPStatus.FORBIDDEN, {"Retry-After": "120"}, 120),
(HTTPStatus.FORBIDDEN, {"X-RateLimit-Reset": "1655804454"}, 60.0),
(HTTPStatus.FORBIDDEN, {"X-RateLimit-Reset": "1655804724"}, 300.0),
],
)
@patch("time.time", return_value=1655804424.0)
def test_backoff_time(time_mock, http_status, response_headers, expected_backoff_time):
response_mock = MagicMock()
response_mock.status_code = http_status
response_mock.headers = response_headers
args = {"authenticator": None, "repositories": ["test_repo"], "start_date": "start_date", "page_size_for_large_streams": 30}
stream = PullRequestCommentReactions(**args)
assert stream.backoff_time(response_mock) == expected_backoff_time
@pytest.mark.parametrize(
("http_status", "response_headers", "text"),
[
(HTTPStatus.OK, {"X-RateLimit-Resource": "graphql"}, '{"errors": [{"type": "RATE_LIMITED"}]}'),
(HTTPStatus.FORBIDDEN, {"X-RateLimit-Remaining": "0"}, ""),
(HTTPStatus.FORBIDDEN, {"Retry-After": "0"}, ""),
(HTTPStatus.FORBIDDEN, {"Retry-After": "60"}, ""),
(HTTPStatus.INTERNAL_SERVER_ERROR, {}, ""),
(HTTPStatus.BAD_GATEWAY, {}, ""),
(HTTPStatus.SERVICE_UNAVAILABLE, {}, ""),
],
)
def test_should_retry(http_status, response_headers, text):
stream = RepositoryStats(repositories=["test_repo"], page_size_for_large_streams=30)
response_mock = MagicMock()
response_mock.status_code = http_status
response_mock.headers = response_headers
response_mock.text = text
response_mock.json = lambda: json.loads(text)
assert stream.should_retry(response_mock)
@responses.activate
@patch("time.sleep")
def test_retry_after(time_mock):
first_request = True
def request_callback(request):
nonlocal first_request
if first_request:
first_request = False
return (HTTPStatus.FORBIDDEN, {"Retry-After": "60"}, "")
return (HTTPStatus.OK, {}, '{"login": "airbytehq"}')
responses.add_callback(
responses.GET,
"https://api.github.com/orgs/airbytehq",
callback=request_callback,
content_type="application/json",
)
stream = Organizations(organizations=["airbytehq"])
list(read_full_refresh(stream))
assert len(responses.calls) == 2
assert responses.calls[0].request.url == "https://api.github.com/orgs/airbytehq?per_page=100"
assert responses.calls[1].request.url == "https://api.github.com/orgs/airbytehq?per_page=100"
@responses.activate
@patch("time.sleep")
@patch("time.time", return_value=1655804424.0)
def test_graphql_rate_limited(time_mock, sleep_mock):
response_objects = [
(
HTTPStatus.OK,
{"X-RateLimit-Limit": "5000", "X-RateLimit-Resource": "graphql", "X-RateLimit-Reset": "1655804724"},
json.dumps({"errors": [{"type": "RATE_LIMITED"}]}),
),
(
HTTPStatus.OK,
{"X-RateLimit-Limit": "5000", "X-RateLimit-Resource": "graphql", "X-RateLimit-Reset": "1655808324"},
json.dumps({"data": {"repository": None}}),
),
]
responses.add_callback(
responses.POST,
"https://api.github.com/graphql",
callback=lambda r: response_objects.pop(0),
content_type="application/json",
)
stream = PullRequestStats(repositories=["airbytehq/airbyte"], page_size_for_large_streams=30)
records = list(read_full_refresh(stream))
assert records == []
assert len(responses.calls) == 2
assert responses.calls[0].request.url == "https://api.github.com/graphql"
assert responses.calls[1].request.url == "https://api.github.com/graphql"
assert sum([c[0][0] for c in sleep_mock.call_args_list]) > 300
@responses.activate
def test_stream_teams_404():
organization_args = {"organizations": ["org_name"]}
stream = Teams(**organization_args)
responses.add(
"GET",
"https://api.github.com/orgs/org_name/teams",
status=requests.codes.NOT_FOUND,
json={"message": "Not Found", "documentation_url": "https://docs.github.com/rest/reference/teams#list-teams"},
)
assert list(read_full_refresh(stream)) == []
assert len(responses.calls) == 1
assert responses.calls[0].request.url == "https://api.github.com/orgs/org_name/teams?per_page=100"
@responses.activate
@patch("time.sleep")
def test_stream_teams_502(sleep_mock):
organization_args = {"organizations": ["org_name"]}
stream = Teams(**organization_args)
url = "https://api.github.com/orgs/org_name/teams"
responses.add(
method="GET",
url=url,
status=requests.codes.BAD_GATEWAY,
json={"message": "Server Error"},
)
assert list(read_full_refresh(stream)) == []
assert len(responses.calls) == 6
# Check whether url is the same for all response.calls
assert set(call.request.url for call in responses.calls).symmetric_difference({f"{url}?per_page=100"}) == set()
@responses.activate
def test_stream_organizations_read():
organization_args = {"organizations": ["org1", "org2"]}
stream = Organizations(**organization_args)
responses.add("GET", "https://api.github.com/orgs/org1", json={"id": 1})
responses.add("GET", "https://api.github.com/orgs/org2", json={"id": 2})
records = list(read_full_refresh(stream))
assert records == [{"id": 1}, {"id": 2}]
@responses.activate
def test_stream_teams_read():
organization_args = {"organizations": ["org1", "org2"]}
stream = Teams(**organization_args)
responses.add("GET", "https://api.github.com/orgs/org1/teams", json=[{"id": 1}, {"id": 2}])
responses.add("GET", "https://api.github.com/orgs/org2/teams", json=[{"id": 3}])
records = list(read_full_refresh(stream))
assert records == [{"id": 1, "organization": "org1"}, {"id": 2, "organization": "org1"}, {"id": 3, "organization": "org2"}]
assert len(responses.calls) == 2
assert responses.calls[0].request.url == "https://api.github.com/orgs/org1/teams?per_page=100"
assert responses.calls[1].request.url == "https://api.github.com/orgs/org2/teams?per_page=100"
@responses.activate
def test_stream_users_read():
organization_args = {"organizations": ["org1", "org2"]}
stream = Users(**organization_args)
responses.add("GET", "https://api.github.com/orgs/org1/members", json=[{"id": 1}, {"id": 2}])
responses.add("GET", "https://api.github.com/orgs/org2/members", json=[{"id": 3}])
records = list(read_full_refresh(stream))
assert records == [{"id": 1, "organization": "org1"}, {"id": 2, "organization": "org1"}, {"id": 3, "organization": "org2"}]
assert len(responses.calls) == 2
assert responses.calls[0].request.url == "https://api.github.com/orgs/org1/members?per_page=100"
assert responses.calls[1].request.url == "https://api.github.com/orgs/org2/members?per_page=100"
@responses.activate
def test_stream_repositories_404():
organization_args = {"organizations": ["org_name"]}
stream = Repositories(**organization_args)
responses.add(
"GET",
"https://api.github.com/orgs/org_name/repos",
status=requests.codes.NOT_FOUND,
json={"message": "Not Found", "documentation_url": "https://docs.github.com/rest/reference/repos#list-organization-repositories"},
)
assert list(read_full_refresh(stream)) == []
assert len(responses.calls) == 1
assert responses.calls[0].request.url == "https://api.github.com/orgs/org_name/repos?per_page=100&sort=updated&direction=desc"
@responses.activate
def test_stream_repositories_401(caplog):
organization_args = {"organizations": ["org_name"], "access_token_type": constants.PERSONAL_ACCESS_TOKEN_TITLE}
stream = Repositories(**organization_args)
responses.add(
"GET",
"https://api.github.com/orgs/org_name/repos",
status=requests.codes.UNAUTHORIZED,
json={"message": "Bad credentials", "documentation_url": "https://docs.github.com/rest"},
)
with pytest.raises(HTTPError):
assert list(read_full_refresh(stream)) == []
assert len(responses.calls) == 1
assert responses.calls[0].request.url == "https://api.github.com/orgs/org_name/repos?per_page=100&sort=updated&direction=desc"
assert "Personal Access Token renewal is required: Bad credentials" in caplog.messages
@responses.activate
def test_stream_repositories_read():
organization_args = {"organizations": ["org1", "org2"]}
stream = Repositories(**organization_args)
updated_at = "2020-01-01T00:00:00Z"
responses.add(
"GET", "https://api.github.com/orgs/org1/repos", json=[{"id": 1, "updated_at": updated_at}, {"id": 2, "updated_at": updated_at}]
)
responses.add("GET", "https://api.github.com/orgs/org2/repos", json=[{"id": 3, "updated_at": updated_at}])
records = list(read_full_refresh(stream))
assert records == [
{"id": 1, "organization": "org1", "updated_at": updated_at},
{"id": 2, "organization": "org1", "updated_at": updated_at},
{"id": 3, "organization": "org2", "updated_at": updated_at},
]
assert len(responses.calls) == 2
assert responses.calls[0].request.url == "https://api.github.com/orgs/org1/repos?per_page=100&sort=updated&direction=desc"
assert responses.calls[1].request.url == "https://api.github.com/orgs/org2/repos?per_page=100&sort=updated&direction=desc"
@responses.activate
def test_stream_projects_disabled():
repository_args_with_start_date = {"start_date": "start_date", "page_size_for_large_streams": 30, "repositories": ["test_repo"]}
stream = Projects(**repository_args_with_start_date)
responses.add(
"GET",
"https://api.github.com/repos/test_repo/projects",
status=requests.codes.GONE,
json={"message": "Projects are disabled for this repository", "documentation_url": "https://docs.github.com/v3/projects"},
)
assert list(read_full_refresh(stream)) == []
assert len(responses.calls) == 1
assert responses.calls[0].request.url == "https://api.github.com/repos/test_repo/projects?per_page=100&state=all"
@responses.activate
def test_stream_pull_requests_incremental_read():
page_size = 2
repository_args_with_start_date = {
"repositories": ["organization/repository"],
"page_size_for_large_streams": page_size,
"start_date": "2022-02-02T10:10:03Z",
}
stream = PullRequests(**repository_args_with_start_date)
data = [
{"id": 1, "updated_at": "2022-02-02T10:10:02Z"},
{"id": 2, "updated_at": "2022-02-02T10:10:04Z"},
{"id": 3, "updated_at": "2022-02-02T10:10:06Z"},
{"id": 4, "updated_at": "2022-02-02T10:10:08Z"},
{"id": 5, "updated_at": "2022-02-02T10:10:10Z"},
{"id": 6, "updated_at": "2022-02-02T10:10:12Z"},
]
api_url = "https://api.github.com/repos/organization/repository/pulls"
responses.add(
"GET",
api_url,
json=data[0:2],
headers={"Link": '<https://api.github.com/repositories/400052213/pulls?page=2>; rel="next"'},
match=[matchers.query_param_matcher({"per_page": str(page_size), "direction": "asc"}, strict_match=False)],
)
responses.add(
"GET",
api_url,
json=data[2:4],
match=[matchers.query_param_matcher({"per_page": str(page_size), "direction": "asc", "page": "2"}, strict_match=False)],
)
responses.add(
"GET",
api_url,
json=data[5:3:-1],
headers={"Link": '<https://api.github.com/repositories/400052213/pulls?page=2>; rel="next"'},
match=[matchers.query_param_matcher({"per_page": str(page_size), "direction": "desc"}, strict_match=False)],
)
responses.add(
"GET",
api_url,
json=data[3:1:-1],
headers={"Link": '<https://api.github.com/repositories/400052213/pulls?page=3>; rel="next"'},
match=[matchers.query_param_matcher({"per_page": str(page_size), "direction": "desc", "page": "2"}, strict_match=False)],
)
stream_state = {}
records = read_incremental(stream, stream_state)
assert [r["id"] for r in records] == [2, 3, 4]
assert stream_state == {"organization/repository": {"updated_at": "2022-02-02T10:10:08Z"}}
records = read_incremental(stream, stream_state)
assert [r["id"] for r in records] == [6, 5]
assert stream_state == {"organization/repository": {"updated_at": "2022-02-02T10:10:12Z"}}
@responses.activate
def test_stream_commits_incremental_read():
repository_args_with_start_date = {
"repositories": ["organization/repository"],
"page_size_for_large_streams": 100,
"start_date": "2022-02-02T10:10:03Z",
}
default_branches = {"organization/repository": "master"}
branches_to_pull = {"organization/repository": ["branch"]}
stream = Commits(**repository_args_with_start_date, branches_to_pull=branches_to_pull, default_branches=default_branches)
stream.page_size = 2
data = [
{"sha": 1, "commit": {"author": {"date": "2022-02-02T10:10:02Z"}}},
{"sha": 2, "commit": {"author": {"date": "2022-02-02T10:10:04Z"}}},
{"sha": 3, "commit": {"author": {"date": "2022-02-02T10:10:06Z"}}},
{"sha": 4, "commit": {"author": {"date": "2022-02-02T10:10:08Z"}}},
{"sha": 5, "commit": {"author": {"date": "2022-02-02T10:10:10Z"}}},
{"sha": 6, "commit": {"author": {"date": "2022-02-02T10:10:12Z"}}},
{"sha": 7, "commit": {"author": {"date": "2022-02-02T10:10:14Z"}}},
]
api_url = "https://api.github.com/repos/organization/repository/commits"
responses.add(
"GET",
api_url,
json=data[0:3],
match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:03Z", "sha": "branch", "per_page": "2"}, strict_match=False)],
)
responses.add(
"GET",
api_url,
json=data[3:5],
headers={"Link": '<https://api.github.com/repos/organization/repository/commits?page=2>; rel="next"'},
match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:06Z", "sha": "branch", "per_page": "2"}, strict_match=False)],
)
responses.add(
"GET",
api_url,
json=data[5:7],
match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:06Z", "sha": "branch", "per_page": "2", "page": "2"}, strict_match=False)],
)
stream_state = {}
records = read_incremental(stream, stream_state)
assert [r["sha"] for r in records] == [2, 3]
assert stream_state == {"organization/repository": {"branch": {"created_at": "2022-02-02T10:10:06Z"}}}
records = read_incremental(stream, stream_state)
assert [r["sha"] for r in records] == [4, 5, 6, 7]
assert stream_state == {"organization/repository": {"branch": {"created_at": "2022-02-02T10:10:14Z"}}}
@responses.activate
def test_stream_pull_request_commits():
repository_args = {
"repositories": ["organization/repository"],
"page_size_for_large_streams": 100,
}
repository_args_with_start_date = {**repository_args, "start_date": "2022-02-02T10:10:02Z"}
stream = PullRequestCommits(PullRequests(**repository_args_with_start_date), **repository_args)
responses.add(
"GET",
"https://api.github.com/repos/organization/repository/pulls",
json=[
{"id": 1, "updated_at": "2022-02-02T10:10:02Z", "number": 1},
{"id": 2, "updated_at": "2022-02-02T10:10:04Z", "number": 2},
{"id": 3, "updated_at": "2022-02-02T10:10:06Z", "number": 3},
],
)
responses.add(
"GET",
"https://api.github.com/repos/organization/repository/pulls/2/commits",
json=[{"sha": 1}, {"sha": 2}],
)
responses.add(
"GET",
"https://api.github.com/repos/organization/repository/pulls/3/commits",
json=[{"sha": 3}, {"sha": 4}],
)
records = list(read_full_refresh(stream))
assert records == [
{"sha": 1, "repository": "organization/repository", "pull_number": 2},
{"sha": 2, "repository": "organization/repository", "pull_number": 2},
{"sha": 3, "repository": "organization/repository", "pull_number": 3},
{"sha": 4, "repository": "organization/repository", "pull_number": 3},
]
@responses.activate
def test_stream_project_columns():
repository_args_with_start_date = {
"repositories": ["organization/repository"],
"page_size_for_large_streams": 100,
"start_date": "2022-02-01T00:00:00Z",
}
data = [
{
"updated_at": "2022-01-01T10:00:00Z",
},
{
"updated_at": "2022-03-01T10:00:00Z",
"columns": [
{"updated_at": "2022-01-01T10:00:00Z"},
{"updated_at": "2022-03-01T09:00:00Z"},
{"updated_at": "2022-03-01T10:00:00Z"},
],
},
{
"updated_at": "2022-05-01T10:00:00Z",
"columns": [
{"updated_at": "2022-01-01T10:00:00Z"},
{"updated_at": "2022-05-01T10:00:00Z"},
],
},
]
ProjectsResponsesAPI.register(data)
projects_stream = Projects(**repository_args_with_start_date)
stream = ProjectColumns(projects_stream, **repository_args_with_start_date)
stream_state = {}
records = read_incremental(stream, stream_state=stream_state)
assert records == [
{"id": 22, "name": "column_22", "project_id": 2, "repository": "organization/repository", "updated_at": "2022-03-01T09:00:00Z"},
{"id": 23, "name": "column_23", "project_id": 2, "repository": "organization/repository", "updated_at": "2022-03-01T10:00:00Z"},
{"id": 32, "name": "column_32", "project_id": 3, "repository": "organization/repository", "updated_at": "2022-05-01T10:00:00Z"},
]
assert stream_state == {
"organization/repository": {"2": {"updated_at": "2022-03-01T10:00:00Z"}, "3": {"updated_at": "2022-05-01T10:00:00Z"}}
}
data = [
{"updated_at": "2022-01-01T10:00:00Z"},
{
"updated_at": "2022-04-01T10:00:00Z",
"columns": [
{"updated_at": "2022-01-01T10:00:00Z"},
{"updated_at": "2022-03-01T09:00:00Z"},
{"updated_at": "2022-03-01T10:00:00Z"},
{"updated_at": "2022-04-01T10:00:00Z"},
],
},
{
"updated_at": "2022-05-01T10:00:00Z",
"columns": [
{"updated_at": "2022-01-01T10:00:00Z"},
{"updated_at": "2022-05-01T10:00:00Z"},
],
},
{
"updated_at": "2022-06-01T10:00:00Z",
"columns": [{"updated_at": "2022-06-01T10:00:00Z"}],
},
]
ProjectsResponsesAPI.register(data)
projects_stream._session.cache.clear()
stream._session.cache.clear()
records = read_incremental(stream, stream_state=stream_state)
assert records == [
{"id": 24, "name": "column_24", "project_id": 2, "repository": "organization/repository", "updated_at": "2022-04-01T10:00:00Z"},
{"id": 41, "name": "column_41", "project_id": 4, "repository": "organization/repository", "updated_at": "2022-06-01T10:00:00Z"},
]
assert stream_state == {
"organization/repository": {
"2": {"updated_at": "2022-04-01T10:00:00Z"},
"3": {"updated_at": "2022-05-01T10:00:00Z"},
"4": {"updated_at": "2022-06-01T10:00:00Z"},
}
}
@responses.activate
def test_stream_project_cards():
repository_args_with_start_date = {
"repositories": ["organization/repository"],
"page_size_for_large_streams": 100,
"start_date": "2022-03-01T00:00:00Z",
}
projects_stream = Projects(**repository_args_with_start_date)
project_columns_stream = ProjectColumns(projects_stream, **repository_args_with_start_date)
stream = ProjectCards(project_columns_stream, **repository_args_with_start_date)
data = [
{
"updated_at": "2022-01-01T00:00:00Z",
},
{
"updated_at": "2022-06-01T00:00:00Z",
"columns": [
{
"updated_at": "2022-04-01T00:00:00Z",
"cards": [
{"updated_at": "2022-03-01T00:00:00Z"},
{"updated_at": "2022-04-01T00:00:00Z"},
],
},
{"updated_at": "2022-05-01T09:00:00Z"},
{
"updated_at": "2022-06-01T00:00:00Z",
"cards": [
{"updated_at": "2022-05-01T00:00:00Z"},
{"updated_at": "2022-06-01T00:00:00Z"},
],
},
],
},
{
"updated_at": "2022-05-01T00:00:00Z",
"columns": [
{"updated_at": "2022-01-01T00:00:00Z"},
{
"updated_at": "2022-05-01T00:00:00Z",
"cards": [
{"updated_at": "2022-02-01T00:00:00Z"},
{"updated_at": "2022-05-01T00:00:00Z"},
],
},
],
},
]
ProjectsResponsesAPI.register(data)
stream_state = {}
projects_stream._session.cache.clear()
project_columns_stream._session.cache.clear()
records = read_incremental(stream, stream_state=stream_state)
assert records == [
{
"column_id": 21,
"id": 212,
"name": "card_212",
"project_id": 2,
"repository": "organization/repository",
"updated_at": "2022-04-01T00:00:00Z",
},
{
"column_id": 23,
"id": 231,
"name": "card_231",
"project_id": 2,
"repository": "organization/repository",
"updated_at": "2022-05-01T00:00:00Z",
},
{
"column_id": 23,
"id": 232,
"name": "card_232",
"project_id": 2,
"repository": "organization/repository",
"updated_at": "2022-06-01T00:00:00Z",
},
{
"column_id": 32,
"id": 322,
"name": "card_322",
"project_id": 3,
"repository": "organization/repository",
"updated_at": "2022-05-01T00:00:00Z",
},
]
@responses.activate
def test_stream_comments():
repository_args_with_start_date = {
"repositories": ["organization/repository", "airbytehq/airbyte"],
"page_size_for_large_streams": 2,
"start_date": "2022-02-02T10:10:01Z",
}
stream = Comments(**repository_args_with_start_date)
data = [
{"id": 1, "updated_at": "2022-02-02T10:10:02Z"},
{"id": 2, "updated_at": "2022-02-02T10:10:04Z"},
{"id": 3, "updated_at": "2022-02-02T10:12:06Z"},
{"id": 4, "updated_at": "2022-02-02T10:12:08Z"},
{"id": 5, "updated_at": "2022-02-02T10:12:10Z"},
{"id": 6, "updated_at": "2022-02-02T10:12:12Z"},
]
api_url = "https://api.github.com/repos/organization/repository/issues/comments"
responses.add(
"GET",
api_url,
json=data[0:2],
match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:01Z", "per_page": "2"})],
)
responses.add(
"GET",
api_url,
json=data[1:3],
headers={
"Link": '<https://api.github.com/repos/organization/repository/issues/comments?per_page=2&since=2022-02-02T10%3A10%3A04Z&page=2>; rel="next"'
},
match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:04Z", "per_page": "2"})],
)
responses.add(
"GET",
api_url,
json=data[3:5],
headers={
"Link": '<https://api.github.com/repos/organization/repository/issues/comments?per_page=2&since=2022-02-02T10%3A10%3A04Z&page=3>; rel="next"'
},
match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:04Z", "page": "2", "per_page": "2"})],
)
responses.add(
"GET",
api_url,
json=data[5:],
match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:04Z", "page": "3", "per_page": "2"})],
)
data = [
{"id": 1, "updated_at": "2022-02-02T10:11:02Z"},
{"id": 2, "updated_at": "2022-02-02T10:11:04Z"},
{"id": 3, "updated_at": "2022-02-02T10:13:06Z"},
{"id": 4, "updated_at": "2022-02-02T10:13:08Z"},
{"id": 5, "updated_at": "2022-02-02T10:13:10Z"},
{"id": 6, "updated_at": "2022-02-02T10:13:12Z"},
]
api_url = "https://api.github.com/repos/airbytehq/airbyte/issues/comments"
responses.add(
"GET",
api_url,
json=data[0:2],
match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:01Z", "per_page": "2"})],
)
responses.add(
"GET",
api_url,
json=data[1:3],
headers={
"Link": '<https://api.github.com/repos/airbytehq/airbyte/issues/comments?per_page=2&since=2022-02-02T10%3A11%3A04Z&page=2>; rel="next"'
},
match=[matchers.query_param_matcher({"since": "2022-02-02T10:11:04Z", "per_page": "2"})],
)
responses.add(
"GET",
api_url,
json=data[3:5],
headers={
"Link": '<https://api.github.com/repos/airbytehq/airbyte/issues/comments?per_page=2&since=2022-02-02T10%3A11%3A04Z&page=3>; rel="next"'
},
match=[matchers.query_param_matcher({"since": "2022-02-02T10:11:04Z", "page": "2", "per_page": "2"})],
)
responses.add(
"GET",
api_url,
json=data[5:],
match=[matchers.query_param_matcher({"since": "2022-02-02T10:11:04Z", "page": "3", "per_page": "2"})],
)
stream_state = {}
records = read_incremental(stream, stream_state)
assert records == [
{"id": 1, "repository": "organization/repository", "updated_at": "2022-02-02T10:10:02Z"},
{"id": 2, "repository": "organization/repository", "updated_at": "2022-02-02T10:10:04Z"},
{"id": 1, "repository": "airbytehq/airbyte", "updated_at": "2022-02-02T10:11:02Z"},
{"id": 2, "repository": "airbytehq/airbyte", "updated_at": "2022-02-02T10:11:04Z"},
]
assert stream_state == {
"airbytehq/airbyte": {"updated_at": "2022-02-02T10:11:04Z"},
"organization/repository": {"updated_at": "2022-02-02T10:10:04Z"},
}
records = read_incremental(stream, stream_state)
assert records == [
{"id": 3, "repository": "organization/repository", "updated_at": "2022-02-02T10:12:06Z"},
{"id": 4, "repository": "organization/repository", "updated_at": "2022-02-02T10:12:08Z"},
{"id": 5, "repository": "organization/repository", "updated_at": "2022-02-02T10:12:10Z"},
{"id": 6, "repository": "organization/repository", "updated_at": "2022-02-02T10:12:12Z"},
{"id": 3, "repository": "airbytehq/airbyte", "updated_at": "2022-02-02T10:13:06Z"},
{"id": 4, "repository": "airbytehq/airbyte", "updated_at": "2022-02-02T10:13:08Z"},
{"id": 5, "repository": "airbytehq/airbyte", "updated_at": "2022-02-02T10:13:10Z"},
{"id": 6, "repository": "airbytehq/airbyte", "updated_at": "2022-02-02T10:13:12Z"},
]
assert stream_state == {
"airbytehq/airbyte": {"updated_at": "2022-02-02T10:13:12Z"},
"organization/repository": {"updated_at": "2022-02-02T10:12:12Z"},
}
@responses.activate
def test_streams_read_full_refresh():
repository_args = {
"repositories": ["organization/repository"],
"page_size_for_large_streams": 100,
}
repository_args_with_start_date = {**repository_args, "start_date": "2022-02-01T00:00:00Z"}
def get_json_response(cursor_field):
cursor_field = cursor_field or "updated_at"
return [
{"id": 1, cursor_field: "2022-02-01T00:00:00Z"},
{"id": 2, cursor_field: "2022-02-02T00:00:00Z"},
]
def get_records(cursor_field):
cursor_field = cursor_field or "updated_at"
return [
{"id": 1, cursor_field: "2022-02-01T00:00:00Z", "repository": "organization/repository"},
{"id": 2, cursor_field: "2022-02-02T00:00:00Z", "repository": "organization/repository"},
]
for cls, url in [
(Releases, "https://api.github.com/repos/organization/repository/releases"),
(IssueEvents, "https://api.github.com/repos/organization/repository/issues/events"),
(IssueMilestones, "https://api.github.com/repos/organization/repository/milestones"),
(CommitComments, "https://api.github.com/repos/organization/repository/comments"),
(Deployments, "https://api.github.com/repos/organization/repository/deployments"),
]:
stream = cls(**repository_args_with_start_date)
responses.add("GET", url, json=get_json_response(stream.cursor_field))
records = list(read_full_refresh(stream))
assert records == get_records(stream.cursor_field)[1:2]
for cls, url in [
(Tags, "https://api.github.com/repos/organization/repository/tags"),
(IssueLabels, "https://api.github.com/repos/organization/repository/labels"),
(Collaborators, "https://api.github.com/repos/organization/repository/collaborators"),
(Branches, "https://api.github.com/repos/organization/repository/branches"),
]:
stream = cls(**repository_args)
responses.add("GET", url, json=get_json_response(stream.cursor_field))
records = list(read_full_refresh(stream))
assert records == get_records(stream.cursor_field)
responses.add(
"GET",
"https://api.github.com/repos/organization/repository/stargazers",
json=[
{"starred_at": "2022-02-01T00:00:00Z", "user": {"id": 1}},
{"starred_at": "2022-02-02T00:00:00Z", "user": {"id": 2}},
],
)
stream = Stargazers(**repository_args_with_start_date)
records = list(read_full_refresh(stream))
assert records == [{"repository": "organization/repository", "starred_at": "2022-02-02T00:00:00Z", "user": {"id": 2}, "user_id": 2}]
@responses.activate
def test_stream_reviews_incremental_read():
repository_args_with_start_date = {
"start_date": "2000-01-01T00:00:00Z",
"page_size_for_large_streams": 30,
"repositories": ["airbytehq/airbyte"],
}
stream = Reviews(**repository_args_with_start_date)
stream.page_size = 2
f = Path(__file__).parent / "graphql_reviews_responses.json"
response_objects = json.load(open(f))
def request_callback(request):
return (HTTPStatus.OK, {}, json.dumps(response_objects.pop(0)))
responses.add_callback(
responses.POST,
"https://api.github.com/graphql",
callback=request_callback,
content_type="application/json",
)
stream_state = {}
records = read_incremental(stream, stream_state)
assert [r["id"] for r in records] == [1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008]
assert stream_state == {"airbytehq/airbyte": {"updated_at": "2000-01-01T00:00:01Z"}}
assert len(responses.calls) == 4
responses.calls.reset()
records = read_incremental(stream, stream_state)
assert [r["id"] for r in records] == [1000, 1007, 1009]
assert stream_state == {"airbytehq/airbyte": {"updated_at": "2000-01-01T00:00:02Z"}}
assert len(responses.calls) == 4
@responses.activate
def test_stream_team_members_full_refresh(caplog):
organization_args = {"organizations": ["org1"]}
repository_args = {"repositories": [], "page_size_for_large_streams": 100}
responses.add("GET", "https://api.github.com/orgs/org1/teams", json=[{"slug": "team1"}, {"slug": "team2"}])
responses.add("GET", "https://api.github.com/orgs/org1/teams/team1/members", json=[{"login": "login1"}, {"login": "login2"}])
responses.add("GET", "https://api.github.com/orgs/org1/teams/team1/memberships/login1", json={"username": "login1"})
responses.add("GET", "https://api.github.com/orgs/org1/teams/team1/memberships/login2", json={"username": "login2"})
responses.add("GET", "https://api.github.com/orgs/org1/teams/team2/members", json=[{"login": "login2"}, {"login": "login3"}])
responses.add("GET", "https://api.github.com/orgs/org1/teams/team2/memberships/login2", json={"username": "login2"})
responses.add("GET", "https://api.github.com/orgs/org1/teams/team2/memberships/login3", status=requests.codes.NOT_FOUND)
teams_stream = Teams(**organization_args)
stream = TeamMembers(parent=teams_stream, **repository_args)
teams_stream._session.cache.clear()
records = list(read_full_refresh(stream))
assert records == [
{"login": "login1", "organization": "org1", "team_slug": "team1"},
{"login": "login2", "organization": "org1", "team_slug": "team1"},
{"login": "login2", "organization": "org1", "team_slug": "team2"},
{"login": "login3", "organization": "org1", "team_slug": "team2"}
]
stream = TeamMemberships(parent=stream, **repository_args)
records = list(read_full_refresh(stream))
assert records == [
{"username": "login1", "organization": "org1", "team_slug": "team1"},
{"username": "login2", "organization": "org1", "team_slug": "team1"},
{"username": "login2", "organization": "org1", "team_slug": "team2"},
]
expected_message = "Syncing `TeamMemberships` stream for organization `org1`, team `team2` and user `login3` isn't available: User has no team membership. Skipping..."
assert expected_message in caplog.messages
@responses.activate
def test_stream_commit_comment_reactions_incremental_read():
repository_args = {"repositories": ["airbytehq/integration-test"], "page_size_for_large_streams": 100}
stream = CommitCommentReactions(**repository_args)
responses.add(
"GET",
"https://api.github.com/repos/airbytehq/integration-test/comments",
json=[
{"id": 55538825, "updated_at": "2021-01-01T15:00:00Z"},
{"id": 55538826, "updated_at": "2021-01-01T16:00:00Z"},
],
)
responses.add(
"GET",
"https://api.github.com/repos/airbytehq/integration-test/comments/55538825/reactions",
json=[
{"id": 154935429, "created_at": "2022-01-01T15:00:00Z"},
{"id": 154935430, "created_at": "2022-01-01T16:00:00Z"},
],
)
responses.add(
"GET",
"https://api.github.com/repos/airbytehq/integration-test/comments/55538826/reactions",
json=[{"id": 154935431, "created_at": "2022-01-01T17:00:00Z"}],
)
stream_state = {}
records = read_incremental(stream, stream_state)
assert stream_state == {
"airbytehq/integration-test": {
"55538825": {"created_at": "2022-01-01T16:00:00Z"},
"55538826": {"created_at": "2022-01-01T17:00:00Z"},
}
}
assert records == [
{"id": 154935429, "comment_id": 55538825, "created_at": "2022-01-01T15:00:00Z", "repository": "airbytehq/integration-test"},
{"id": 154935430, "comment_id": 55538825, "created_at": "2022-01-01T16:00:00Z", "repository": "airbytehq/integration-test"},
{"id": 154935431, "comment_id": 55538826, "created_at": "2022-01-01T17:00:00Z", "repository": "airbytehq/integration-test"},
]
responses.add(
"GET",
"https://api.github.com/repos/airbytehq/integration-test/comments",
json=[
{"id": 55538825, "updated_at": "2021-01-01T15:00:00Z"},
{"id": 55538826, "updated_at": "2021-01-01T16:00:00Z"},
{"id": 55538827, "updated_at": "2022-02-01T15:00:00Z"},
],
)
responses.add(
"GET",
"https://api.github.com/repos/airbytehq/integration-test/comments/55538826/reactions",
json=[
{"id": 154935431, "created_at": "2022-01-01T17:00:00Z"},
{"id": 154935432, "created_at": "2022-02-01T16:00:00Z"},
],
)
responses.add(
"GET",
"https://api.github.com/repos/airbytehq/integration-test/comments/55538827/reactions",
json=[{"id": 154935433, "created_at": "2022-02-01T17:00:00Z"}],
)
stream._parent_stream._session.cache.clear()
records = read_incremental(stream, stream_state)
assert records == [
{"id": 154935432, "comment_id": 55538826, "created_at": "2022-02-01T16:00:00Z", "repository": "airbytehq/integration-test"},
{"id": 154935433, "comment_id": 55538827, "created_at": "2022-02-01T17:00:00Z", "repository": "airbytehq/integration-test"},
]
@responses.activate
def test_stream_workflow_runs_read_incremental(monkeypatch):
repository_args_with_start_date = {
"repositories": ["org/repos"],
"page_size_for_large_streams": 30,
"start_date": "2022-01-01T00:00:00Z",
}
monkeypatch.setattr(constants, "DEFAULT_PAGE_SIZE", 1)
stream = WorkflowRuns(**repository_args_with_start_date)
data = [
{"id": 4, "created_at": "2022-02-05T00:00:00Z", "updated_at": "2022-02-05T00:00:00Z", "repository": {"full_name": "org/repos"}},
{"id": 3, "created_at": "2022-01-15T00:00:00Z", "updated_at": "2022-01-15T00:00:00Z", "repository": {"full_name": "org/repos"}},
{"id": 2, "created_at": "2022-01-03T00:00:00Z", "updated_at": "2022-01-03T00:00:00Z", "repository": {"full_name": "org/repos"}},
{"id": 1, "created_at": "2022-01-02T00:00:00Z", "updated_at": "2022-01-02T00:00:00Z", "repository": {"full_name": "org/repos"}},
]
responses.add(
"GET",
"https://api.github.com/repos/org/repos/actions/runs",
json={"total_count": len(data), "workflow_runs": data[0:1]},
headers={"Link": '<https://api.github.com/repositories/283046497/actions/runs?per_page=1&page=2>; rel="next"'},
match=[matchers.query_param_matcher({"per_page": "1"}, strict_match=True)],
)
responses.add(
"GET",
"https://api.github.com/repos/org/repos/actions/runs",
json={"total_count": len(data), "workflow_runs": data[1:2]},
headers={"Link": '<https://api.github.com/repositories/283046497/actions/runs?per_page=1&page=3>; rel="next"'},
match=[matchers.query_param_matcher({"per_page": "1", "page": "2"}, strict_match=True)],
)
responses.add(
"GET",
"https://api.github.com/repos/org/repos/actions/runs",
json={"total_count": len(data), "workflow_runs": data[2:3]},
headers={"Link": '<https://api.github.com/repositories/283046497/actions/runs?per_page=1&page=4>; rel="next"'},
match=[matchers.query_param_matcher({"per_page": "1", "page": "3"}, strict_match=True)],
)
responses.add(
"GET",
"https://api.github.com/repos/org/repos/actions/runs",
json={"total_count": len(data), "workflow_runs": data[3:4]},
match=[matchers.query_param_matcher({"per_page": "1", "page": "4"}, strict_match=True)],
)
state = {}
records = read_incremental(stream, state)
assert state == {"org/repos": {"updated_at": "2022-02-05T00:00:00Z"}}
assert records == [
{"id": 4, "repository": {"full_name": "org/repos"}, "created_at": "2022-02-05T00:00:00Z", "updated_at": "2022-02-05T00:00:00Z"},
{"id": 3, "repository": {"full_name": "org/repos"}, "created_at": "2022-01-15T00:00:00Z", "updated_at": "2022-01-15T00:00:00Z"},
{"id": 2, "repository": {"full_name": "org/repos"}, "created_at": "2022-01-03T00:00:00Z", "updated_at": "2022-01-03T00:00:00Z"},
{"id": 1, "repository": {"full_name": "org/repos"}, "created_at": "2022-01-02T00:00:00Z", "updated_at": "2022-01-02T00:00:00Z"},
]
assert len(responses.calls) == 4
data.insert(
0,
{
"id": 5,
"created_at": "2022-02-07T00:00:00Z",
"updated_at": "2022-02-07T00:00:00Z",
"repository": {"full_name": "org/repos"},
},
)
data[2]["updated_at"] = "2022-02-08T00:00:00Z"
responses.add(
"GET",
"https://api.github.com/repos/org/repos/actions/runs",
json={"total_count": len(data), "workflow_runs": data[0:1]},
headers={"Link": '<https://api.github.com/repositories/283046497/actions/runs?per_page=1&page=2>; rel="next"'},
match=[matchers.query_param_matcher({"per_page": "1"}, strict_match=True)],
)
responses.add(
"GET",
"https://api.github.com/repos/org/repos/actions/runs",
json={"total_count": len(data), "workflow_runs": data[1:2]},
headers={"Link": '<https://api.github.com/repositories/283046497/actions/runs?per_page=1&page=3>; rel="next"'},
match=[matchers.query_param_matcher({"per_page": "1", "page": "2"}, strict_match=True)],
)
responses.add(
"GET",
"https://api.github.com/repos/org/repos/actions/runs",
json={"total_count": len(data), "workflow_runs": data[2:3]},
headers={"Link": '<https://api.github.com/repositories/283046497/actions/runs?per_page=1&page=4>; rel="next"'},
match=[matchers.query_param_matcher({"per_page": "1", "page": "3"}, strict_match=True)],
)
responses.add(
"GET",
"https://api.github.com/repos/org/repos/actions/runs",
json={"total_count": len(data), "workflow_runs": data[3:4]},
headers={"Link": '<https://api.github.com/repositories/283046497/actions/runs?per_page=1&page=5>; rel="next"'},
match=[matchers.query_param_matcher({"per_page": "1", "page": "4"}, strict_match=True)],
)
responses.calls.reset()
records = read_incremental(stream, state)
assert state == {"org/repos": {"updated_at": "2022-02-08T00:00:00Z"}}
assert records == [
{"id": 5, "repository": {"full_name": "org/repos"}, "created_at": "2022-02-07T00:00:00Z", "updated_at": "2022-02-07T00:00:00Z"},
{"id": 3, "repository": {"full_name": "org/repos"}, "created_at": "2022-01-15T00:00:00Z", "updated_at": "2022-02-08T00:00:00Z"},
]
assert len(responses.calls) == 4
@responses.activate
def test_stream_workflow_jobs_read():
repository_args = {
"repositories": ["org/repo"],
"page_size_for_large_streams": 100,
}
repository_args_with_start_date = {**repository_args, "start_date": "2022-09-02T09:05:00Z"}
workflow_runs_stream = WorkflowRuns(**repository_args_with_start_date)
stream = WorkflowJobs(workflow_runs_stream, **repository_args_with_start_date)
workflow_runs = [
{
"id": 1,
"created_at": "2022-09-02T09:00:00Z",
"updated_at": "2022-09-02T09:10:02Z",
"repository": {"full_name": "org/repo"},
},
{
"id": 2,
"created_at": "2022-09-02T09:06:00Z",
"updated_at": "2022-09-02T09:08:00Z",
"repository": {"full_name": "org/repo"},
},
]
workflow_jobs_1 = [
{"id": 1, "completed_at": "2022-09-02T09:02:00Z", "run_id": 1},
{"id": 4, "completed_at": "2022-09-02T09:10:00Z", "run_id": 1},
{"id": 5, "completed_at": None, "run_id": 1},
]
workflow_jobs_2 = [
{"id": 2, "completed_at": "2022-09-02T09:07:00Z", "run_id": 2},
{"id": 3, "completed_at": "2022-09-02T09:08:00Z", "run_id": 2},
]
responses.add(
"GET",
"https://api.github.com/repos/org/repo/actions/runs",
json={"total_count": len(workflow_runs), "workflow_runs": workflow_runs},
)
responses.add("GET", "https://api.github.com/repos/org/repo/actions/runs/1/jobs", json={"jobs": workflow_jobs_1})
responses.add("GET", "https://api.github.com/repos/org/repo/actions/runs/2/jobs", json={"jobs": workflow_jobs_2})
state = {}
records = read_incremental(stream, state)
assert state == {"org/repo": {"completed_at": "2022-09-02T09:10:00Z"}}
assert records == [
{"completed_at": "2022-09-02T09:10:00Z", "id": 4, "repository": "org/repo", "run_id": 1},
{"completed_at": "2022-09-02T09:07:00Z", "id": 2, "repository": "org/repo", "run_id": 2},
{"completed_at": "2022-09-02T09:08:00Z", "id": 3, "repository": "org/repo", "run_id": 2},
]
assert len(responses.calls) == 3
workflow_jobs_1[2]["completed_at"] = "2022-09-02T09:12:00Z"
workflow_runs[0]["updated_at"] = "2022-09-02T09:12:01Z"
workflow_runs.append(
{
"id": 3,
"created_at": "2022-09-02T09:14:00Z",
"updated_at": "2022-09-02T09:15:00Z",
"repository": {"full_name": "org/repo"},
}
)
workflow_jobs_3 = [
{"id": 6, "completed_at": "2022-09-02T09:15:00Z", "run_id": 3},
{"id": 7, "completed_at": None, "run_id": 3},
]
responses.add(
"GET",
"https://api.github.com/repos/org/repo/actions/runs",
json={"total_count": len(workflow_runs), "workflow_runs": workflow_runs},
)
responses.add("GET", "https://api.github.com/repos/org/repo/actions/runs/1/jobs", json={"jobs": workflow_jobs_1})
responses.add("GET", "https://api.github.com/repos/org/repo/actions/runs/2/jobs", json={"jobs": workflow_jobs_2})
responses.add("GET", "https://api.github.com/repos/org/repo/actions/runs/3/jobs", json={"jobs": workflow_jobs_3})
responses.calls.reset()
records = read_incremental(stream, state)
assert state == {"org/repo": {"completed_at": "2022-09-02T09:15:00Z"}}
assert records == [
{"completed_at": "2022-09-02T09:12:00Z", "id": 5, "repository": "org/repo", "run_id": 1},
{"completed_at": "2022-09-02T09:15:00Z", "id": 6, "repository": "org/repo", "run_id": 3},
]
records = list(read_full_refresh(stream))
assert records == [
{"id": 4, "completed_at": "2022-09-02T09:10:00Z", "run_id": 1, "repository": "org/repo"},
{"id": 5, "completed_at": "2022-09-02T09:12:00Z", "run_id": 1, "repository": "org/repo"},
{"id": 2, "completed_at": "2022-09-02T09:07:00Z", "run_id": 2, "repository": "org/repo"},
{"id": 3, "completed_at": "2022-09-02T09:08:00Z", "run_id": 2, "repository": "org/repo"},
{"id": 6, "completed_at": "2022-09-02T09:15:00Z", "run_id": 3, "repository": "org/repo"},
]
@responses.activate
def test_stream_pull_request_comment_reactions_read():
repository_args_with_start_date = {
"start_date": "2022-01-01T00:00:00Z",
"page_size_for_large_streams": 2,
"repositories": ["airbytehq/airbyte"],
}
stream = PullRequestCommentReactions(**repository_args_with_start_date)
stream.page_size = 2
f = Path(__file__).parent / "pull_request_comment_reactions.json"
response_objects = json.load(open(f))
def request_callback(request):
return (HTTPStatus.OK, {}, json.dumps(response_objects.pop(0)))
responses.add_callback(
responses.POST,
"https://api.github.com/graphql",
callback=request_callback,
content_type="application/json",
)
stream_state = {}
records = read_incremental(stream, stream_state)
records = [{"comment_id": r["comment_id"], "created_at": r["created_at"], "node_id": r["node_id"]} for r in records]
assert records == [
{"comment_id": "comment1", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction1"},
{"comment_id": "comment1", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction2"},
{"comment_id": "comment2", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction3"},
{"comment_id": "comment2", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction4"},
{"comment_id": "comment2", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction5"},
{"comment_id": "comment5", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction6"},
{"comment_id": "comment7", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction7"},
{"comment_id": "comment8", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction8"},
]
assert stream_state == {"airbytehq/airbyte": {"created_at": "2022-01-01T00:00:01Z"}}
records = read_incremental(stream, stream_state)
records = [{"comment_id": r["comment_id"], "created_at": r["created_at"], "node_id": r["node_id"]} for r in records]
assert records == [
{"comment_id": "comment2", "created_at": "2022-01-02T00:00:01Z", "node_id": "reaction9"},
{"comment_id": "comment8", "created_at": "2022-01-02T00:00:01Z", "node_id": "reaction10"},
]
assert stream_state == {"airbytehq/airbyte": {"created_at": "2022-01-02T00:00:01Z"}}
@responses.activate
def test_stream_projects_v2_graphql_retry():
repository_args_with_start_date = {
"start_date": "2022-01-01T00:00:00Z",
"page_size_for_large_streams": 20,
"repositories": ["airbytehq/airbyte"],
}
stream = ProjectsV2(**repository_args_with_start_date)
resp = responses.add(responses.POST, "https://api.github.com/graphql", json={"errors": "not found"}, status=200, )
with patch.object(stream, "backoff_time", return_value=0.01), pytest.raises(UserDefinedBackoffException):
read_incremental(stream, stream_state={})
assert resp.call_count == stream.max_retries + 1
def test_stream_projects_v2_graphql_query():
repository_args_with_start_date = {
"start_date": "2022-01-01T00:00:00Z",
"page_size_for_large_streams": 20,
"repositories": ["airbytehq/airbyte"],
}
stream = ProjectsV2(**repository_args_with_start_date)
query = stream.request_body_json(stream_state={}, stream_slice={'repository': 'airbytehq/airbyte'})
f = Path(__file__).parent / "projects_v2_pull_requests_query.json"
expected_query = json.load(open(f))
assert query == expected_query