# # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # import json from http import HTTPStatus from pathlib import Path from unittest.mock import MagicMock, patch import pytest import requests import responses from airbyte_cdk.sources.streams.http.exceptions import BaseBackoffException, UserDefinedBackoffException from requests import HTTPError from responses import matchers from source_github import constants from source_github.streams import ( Branches, Collaborators, Comments, CommitCommentReactions, CommitComments, Commits, Deployments, IssueEvents, IssueLabels, IssueMilestones, Organizations, ProjectCards, ProjectColumns, Projects, ProjectsV2, PullRequestCommentReactions, PullRequestCommits, PullRequests, PullRequestStats, Releases, Repositories, RepositoryStats, Reviews, Stargazers, Tags, TeamMembers, TeamMemberships, Teams, Users, WorkflowJobs, WorkflowRuns, ) from source_github.utils import read_full_refresh from .utils import ProjectsResponsesAPI, read_incremental DEFAULT_BACKOFF_DELAYS = [5, 10, 20, 40, 80] @responses.activate @patch("time.sleep") def test_internal_server_error_retry(time_mock): args = {"authenticator": None, "repositories": ["airbytehq/airbyte"], "start_date": "start_date", "page_size_for_large_streams": 30} stream = CommitCommentReactions(**args) stream_slice = {"repository": "airbytehq/airbyte", "comment_id": "id"} time_mock.reset_mock() responses.add("GET", "https://api.github.com/repos/airbytehq/airbyte/comments/id/reactions", status=HTTPStatus.INTERNAL_SERVER_ERROR) with pytest.raises(BaseBackoffException): list(stream.read_records(sync_mode="full_refresh", stream_slice=stream_slice)) sleep_delays = [delay[0][0] for delay in time_mock.call_args_list] assert sleep_delays == DEFAULT_BACKOFF_DELAYS @pytest.mark.parametrize( ("http_status", "response_headers", "expected_backoff_time"), [ (HTTPStatus.BAD_GATEWAY, {}, None), (HTTPStatus.INTERNAL_SERVER_ERROR, {}, None), (HTTPStatus.SERVICE_UNAVAILABLE, {}, None), (HTTPStatus.FORBIDDEN, {"Retry-After": "0"}, 60), (HTTPStatus.FORBIDDEN, {"Retry-After": "30"}, 60), (HTTPStatus.FORBIDDEN, {"Retry-After": "120"}, 120), (HTTPStatus.FORBIDDEN, {"X-RateLimit-Reset": "1655804454"}, 60.0), (HTTPStatus.FORBIDDEN, {"X-RateLimit-Reset": "1655804724"}, 300.0), ], ) @patch("time.time", return_value=1655804424.0) def test_backoff_time(time_mock, http_status, response_headers, expected_backoff_time): response_mock = MagicMock() response_mock.status_code = http_status response_mock.headers = response_headers args = {"authenticator": None, "repositories": ["test_repo"], "start_date": "start_date", "page_size_for_large_streams": 30} stream = PullRequestCommentReactions(**args) assert stream.backoff_time(response_mock) == expected_backoff_time @pytest.mark.parametrize( ("http_status", "response_headers", "text"), [ (HTTPStatus.OK, {"X-RateLimit-Resource": "graphql"}, '{"errors": [{"type": "RATE_LIMITED"}]}'), (HTTPStatus.FORBIDDEN, {"X-RateLimit-Remaining": "0"}, ""), (HTTPStatus.FORBIDDEN, {"Retry-After": "0"}, ""), (HTTPStatus.FORBIDDEN, {"Retry-After": "60"}, ""), (HTTPStatus.INTERNAL_SERVER_ERROR, {}, ""), (HTTPStatus.BAD_GATEWAY, {}, ""), (HTTPStatus.SERVICE_UNAVAILABLE, {}, ""), ], ) def test_should_retry(http_status, response_headers, text): stream = RepositoryStats(repositories=["test_repo"], page_size_for_large_streams=30) response_mock = MagicMock() response_mock.status_code = http_status response_mock.headers = response_headers response_mock.text = text response_mock.json = lambda: json.loads(text) assert stream.should_retry(response_mock) @responses.activate @patch("time.sleep") def test_retry_after(time_mock): first_request = True def request_callback(request): nonlocal first_request if first_request: first_request = False return (HTTPStatus.FORBIDDEN, {"Retry-After": "60"}, "") return (HTTPStatus.OK, {}, '{"login": "airbytehq"}') responses.add_callback( responses.GET, "https://api.github.com/orgs/airbytehq", callback=request_callback, content_type="application/json", ) stream = Organizations(organizations=["airbytehq"]) list(read_full_refresh(stream)) assert len(responses.calls) == 2 assert responses.calls[0].request.url == "https://api.github.com/orgs/airbytehq?per_page=100" assert responses.calls[1].request.url == "https://api.github.com/orgs/airbytehq?per_page=100" @responses.activate @patch("time.sleep") @patch("time.time", return_value=1655804424.0) def test_graphql_rate_limited(time_mock, sleep_mock): response_objects = [ ( HTTPStatus.OK, {"X-RateLimit-Limit": "5000", "X-RateLimit-Resource": "graphql", "X-RateLimit-Reset": "1655804724"}, json.dumps({"errors": [{"type": "RATE_LIMITED"}]}), ), ( HTTPStatus.OK, {"X-RateLimit-Limit": "5000", "X-RateLimit-Resource": "graphql", "X-RateLimit-Reset": "1655808324"}, json.dumps({"data": {"repository": None}}), ), ] responses.add_callback( responses.POST, "https://api.github.com/graphql", callback=lambda r: response_objects.pop(0), content_type="application/json", ) stream = PullRequestStats(repositories=["airbytehq/airbyte"], page_size_for_large_streams=30) records = list(read_full_refresh(stream)) assert records == [] assert len(responses.calls) == 2 assert responses.calls[0].request.url == "https://api.github.com/graphql" assert responses.calls[1].request.url == "https://api.github.com/graphql" assert sum([c[0][0] for c in sleep_mock.call_args_list]) > 300 @responses.activate def test_stream_teams_404(): organization_args = {"organizations": ["org_name"]} stream = Teams(**organization_args) responses.add( "GET", "https://api.github.com/orgs/org_name/teams", status=requests.codes.NOT_FOUND, json={"message": "Not Found", "documentation_url": "https://docs.github.com/rest/reference/teams#list-teams"}, ) assert list(read_full_refresh(stream)) == [] assert len(responses.calls) == 1 assert responses.calls[0].request.url == "https://api.github.com/orgs/org_name/teams?per_page=100" @responses.activate @patch("time.sleep") def test_stream_teams_502(sleep_mock): organization_args = {"organizations": ["org_name"]} stream = Teams(**organization_args) url = "https://api.github.com/orgs/org_name/teams" responses.add( method="GET", url=url, status=requests.codes.BAD_GATEWAY, json={"message": "Server Error"}, ) assert list(read_full_refresh(stream)) == [] assert len(responses.calls) == 6 # Check whether url is the same for all response.calls assert set(call.request.url for call in responses.calls).symmetric_difference({f"{url}?per_page=100"}) == set() @responses.activate def test_stream_organizations_read(): organization_args = {"organizations": ["org1", "org2"]} stream = Organizations(**organization_args) responses.add("GET", "https://api.github.com/orgs/org1", json={"id": 1}) responses.add("GET", "https://api.github.com/orgs/org2", json={"id": 2}) records = list(read_full_refresh(stream)) assert records == [{"id": 1}, {"id": 2}] @responses.activate def test_stream_teams_read(): organization_args = {"organizations": ["org1", "org2"]} stream = Teams(**organization_args) responses.add("GET", "https://api.github.com/orgs/org1/teams", json=[{"id": 1}, {"id": 2}]) responses.add("GET", "https://api.github.com/orgs/org2/teams", json=[{"id": 3}]) records = list(read_full_refresh(stream)) assert records == [{"id": 1, "organization": "org1"}, {"id": 2, "organization": "org1"}, {"id": 3, "organization": "org2"}] assert len(responses.calls) == 2 assert responses.calls[0].request.url == "https://api.github.com/orgs/org1/teams?per_page=100" assert responses.calls[1].request.url == "https://api.github.com/orgs/org2/teams?per_page=100" @responses.activate def test_stream_users_read(): organization_args = {"organizations": ["org1", "org2"]} stream = Users(**organization_args) responses.add("GET", "https://api.github.com/orgs/org1/members", json=[{"id": 1}, {"id": 2}]) responses.add("GET", "https://api.github.com/orgs/org2/members", json=[{"id": 3}]) records = list(read_full_refresh(stream)) assert records == [{"id": 1, "organization": "org1"}, {"id": 2, "organization": "org1"}, {"id": 3, "organization": "org2"}] assert len(responses.calls) == 2 assert responses.calls[0].request.url == "https://api.github.com/orgs/org1/members?per_page=100" assert responses.calls[1].request.url == "https://api.github.com/orgs/org2/members?per_page=100" @responses.activate def test_stream_repositories_404(): organization_args = {"organizations": ["org_name"]} stream = Repositories(**organization_args) responses.add( "GET", "https://api.github.com/orgs/org_name/repos", status=requests.codes.NOT_FOUND, json={"message": "Not Found", "documentation_url": "https://docs.github.com/rest/reference/repos#list-organization-repositories"}, ) assert list(read_full_refresh(stream)) == [] assert len(responses.calls) == 1 assert responses.calls[0].request.url == "https://api.github.com/orgs/org_name/repos?per_page=100&sort=updated&direction=desc" @responses.activate def test_stream_repositories_401(caplog): organization_args = {"organizations": ["org_name"], "access_token_type": constants.PERSONAL_ACCESS_TOKEN_TITLE} stream = Repositories(**organization_args) responses.add( "GET", "https://api.github.com/orgs/org_name/repos", status=requests.codes.UNAUTHORIZED, json={"message": "Bad credentials", "documentation_url": "https://docs.github.com/rest"}, ) with pytest.raises(HTTPError): assert list(read_full_refresh(stream)) == [] assert len(responses.calls) == 1 assert responses.calls[0].request.url == "https://api.github.com/orgs/org_name/repos?per_page=100&sort=updated&direction=desc" assert "Personal Access Token renewal is required: Bad credentials" in caplog.messages @responses.activate def test_stream_repositories_read(): organization_args = {"organizations": ["org1", "org2"]} stream = Repositories(**organization_args) updated_at = "2020-01-01T00:00:00Z" responses.add( "GET", "https://api.github.com/orgs/org1/repos", json=[{"id": 1, "updated_at": updated_at}, {"id": 2, "updated_at": updated_at}] ) responses.add("GET", "https://api.github.com/orgs/org2/repos", json=[{"id": 3, "updated_at": updated_at}]) records = list(read_full_refresh(stream)) assert records == [ {"id": 1, "organization": "org1", "updated_at": updated_at}, {"id": 2, "organization": "org1", "updated_at": updated_at}, {"id": 3, "organization": "org2", "updated_at": updated_at}, ] assert len(responses.calls) == 2 assert responses.calls[0].request.url == "https://api.github.com/orgs/org1/repos?per_page=100&sort=updated&direction=desc" assert responses.calls[1].request.url == "https://api.github.com/orgs/org2/repos?per_page=100&sort=updated&direction=desc" @responses.activate def test_stream_projects_disabled(): repository_args_with_start_date = {"start_date": "start_date", "page_size_for_large_streams": 30, "repositories": ["test_repo"]} stream = Projects(**repository_args_with_start_date) responses.add( "GET", "https://api.github.com/repos/test_repo/projects", status=requests.codes.GONE, json={"message": "Projects are disabled for this repository", "documentation_url": "https://docs.github.com/v3/projects"}, ) assert list(read_full_refresh(stream)) == [] assert len(responses.calls) == 1 assert responses.calls[0].request.url == "https://api.github.com/repos/test_repo/projects?per_page=100&state=all" @responses.activate def test_stream_pull_requests_incremental_read(): page_size = 2 repository_args_with_start_date = { "repositories": ["organization/repository"], "page_size_for_large_streams": page_size, "start_date": "2022-02-02T10:10:03Z", } stream = PullRequests(**repository_args_with_start_date) data = [ {"id": 1, "updated_at": "2022-02-02T10:10:02Z"}, {"id": 2, "updated_at": "2022-02-02T10:10:04Z"}, {"id": 3, "updated_at": "2022-02-02T10:10:06Z"}, {"id": 4, "updated_at": "2022-02-02T10:10:08Z"}, {"id": 5, "updated_at": "2022-02-02T10:10:10Z"}, {"id": 6, "updated_at": "2022-02-02T10:10:12Z"}, ] api_url = "https://api.github.com/repos/organization/repository/pulls" responses.add( "GET", api_url, json=data[0:2], headers={"Link": '; rel="next"'}, match=[matchers.query_param_matcher({"per_page": str(page_size), "direction": "asc"}, strict_match=False)], ) responses.add( "GET", api_url, json=data[2:4], match=[matchers.query_param_matcher({"per_page": str(page_size), "direction": "asc", "page": "2"}, strict_match=False)], ) responses.add( "GET", api_url, json=data[5:3:-1], headers={"Link": '; rel="next"'}, match=[matchers.query_param_matcher({"per_page": str(page_size), "direction": "desc"}, strict_match=False)], ) responses.add( "GET", api_url, json=data[3:1:-1], headers={"Link": '; rel="next"'}, match=[matchers.query_param_matcher({"per_page": str(page_size), "direction": "desc", "page": "2"}, strict_match=False)], ) stream_state = {} records = read_incremental(stream, stream_state) assert [r["id"] for r in records] == [2, 3, 4] assert stream_state == {"organization/repository": {"updated_at": "2022-02-02T10:10:08Z"}} records = read_incremental(stream, stream_state) assert [r["id"] for r in records] == [6, 5] assert stream_state == {"organization/repository": {"updated_at": "2022-02-02T10:10:12Z"}} @responses.activate def test_stream_commits_incremental_read(): repository_args_with_start_date = { "repositories": ["organization/repository"], "page_size_for_large_streams": 100, "start_date": "2022-02-02T10:10:03Z", } default_branches = {"organization/repository": "master"} branches_to_pull = {"organization/repository": ["branch"]} stream = Commits(**repository_args_with_start_date, branches_to_pull=branches_to_pull, default_branches=default_branches) stream.page_size = 2 data = [ {"sha": 1, "commit": {"author": {"date": "2022-02-02T10:10:02Z"}}}, {"sha": 2, "commit": {"author": {"date": "2022-02-02T10:10:04Z"}}}, {"sha": 3, "commit": {"author": {"date": "2022-02-02T10:10:06Z"}}}, {"sha": 4, "commit": {"author": {"date": "2022-02-02T10:10:08Z"}}}, {"sha": 5, "commit": {"author": {"date": "2022-02-02T10:10:10Z"}}}, {"sha": 6, "commit": {"author": {"date": "2022-02-02T10:10:12Z"}}}, {"sha": 7, "commit": {"author": {"date": "2022-02-02T10:10:14Z"}}}, ] api_url = "https://api.github.com/repos/organization/repository/commits" responses.add( "GET", api_url, json=data[0:3], match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:03Z", "sha": "branch", "per_page": "2"}, strict_match=False)], ) responses.add( "GET", api_url, json=data[3:5], headers={"Link": '; rel="next"'}, match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:06Z", "sha": "branch", "per_page": "2"}, strict_match=False)], ) responses.add( "GET", api_url, json=data[5:7], match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:06Z", "sha": "branch", "per_page": "2", "page": "2"}, strict_match=False)], ) stream_state = {} records = read_incremental(stream, stream_state) assert [r["sha"] for r in records] == [2, 3] assert stream_state == {"organization/repository": {"branch": {"created_at": "2022-02-02T10:10:06Z"}}} records = read_incremental(stream, stream_state) assert [r["sha"] for r in records] == [4, 5, 6, 7] assert stream_state == {"organization/repository": {"branch": {"created_at": "2022-02-02T10:10:14Z"}}} @responses.activate def test_stream_pull_request_commits(): repository_args = { "repositories": ["organization/repository"], "page_size_for_large_streams": 100, } repository_args_with_start_date = {**repository_args, "start_date": "2022-02-02T10:10:02Z"} stream = PullRequestCommits(PullRequests(**repository_args_with_start_date), **repository_args) responses.add( "GET", "https://api.github.com/repos/organization/repository/pulls", json=[ {"id": 1, "updated_at": "2022-02-02T10:10:02Z", "number": 1}, {"id": 2, "updated_at": "2022-02-02T10:10:04Z", "number": 2}, {"id": 3, "updated_at": "2022-02-02T10:10:06Z", "number": 3}, ], ) responses.add( "GET", "https://api.github.com/repos/organization/repository/pulls/2/commits", json=[{"sha": 1}, {"sha": 2}], ) responses.add( "GET", "https://api.github.com/repos/organization/repository/pulls/3/commits", json=[{"sha": 3}, {"sha": 4}], ) records = list(read_full_refresh(stream)) assert records == [ {"sha": 1, "repository": "organization/repository", "pull_number": 2}, {"sha": 2, "repository": "organization/repository", "pull_number": 2}, {"sha": 3, "repository": "organization/repository", "pull_number": 3}, {"sha": 4, "repository": "organization/repository", "pull_number": 3}, ] @responses.activate def test_stream_project_columns(): repository_args_with_start_date = { "repositories": ["organization/repository"], "page_size_for_large_streams": 100, "start_date": "2022-02-01T00:00:00Z", } data = [ { "updated_at": "2022-01-01T10:00:00Z", }, { "updated_at": "2022-03-01T10:00:00Z", "columns": [ {"updated_at": "2022-01-01T10:00:00Z"}, {"updated_at": "2022-03-01T09:00:00Z"}, {"updated_at": "2022-03-01T10:00:00Z"}, ], }, { "updated_at": "2022-05-01T10:00:00Z", "columns": [ {"updated_at": "2022-01-01T10:00:00Z"}, {"updated_at": "2022-05-01T10:00:00Z"}, ], }, ] ProjectsResponsesAPI.register(data) projects_stream = Projects(**repository_args_with_start_date) stream = ProjectColumns(projects_stream, **repository_args_with_start_date) stream_state = {} records = read_incremental(stream, stream_state=stream_state) assert records == [ {"id": 22, "name": "column_22", "project_id": 2, "repository": "organization/repository", "updated_at": "2022-03-01T09:00:00Z"}, {"id": 23, "name": "column_23", "project_id": 2, "repository": "organization/repository", "updated_at": "2022-03-01T10:00:00Z"}, {"id": 32, "name": "column_32", "project_id": 3, "repository": "organization/repository", "updated_at": "2022-05-01T10:00:00Z"}, ] assert stream_state == { "organization/repository": {"2": {"updated_at": "2022-03-01T10:00:00Z"}, "3": {"updated_at": "2022-05-01T10:00:00Z"}} } data = [ {"updated_at": "2022-01-01T10:00:00Z"}, { "updated_at": "2022-04-01T10:00:00Z", "columns": [ {"updated_at": "2022-01-01T10:00:00Z"}, {"updated_at": "2022-03-01T09:00:00Z"}, {"updated_at": "2022-03-01T10:00:00Z"}, {"updated_at": "2022-04-01T10:00:00Z"}, ], }, { "updated_at": "2022-05-01T10:00:00Z", "columns": [ {"updated_at": "2022-01-01T10:00:00Z"}, {"updated_at": "2022-05-01T10:00:00Z"}, ], }, { "updated_at": "2022-06-01T10:00:00Z", "columns": [{"updated_at": "2022-06-01T10:00:00Z"}], }, ] ProjectsResponsesAPI.register(data) projects_stream._session.cache.clear() stream._session.cache.clear() records = read_incremental(stream, stream_state=stream_state) assert records == [ {"id": 24, "name": "column_24", "project_id": 2, "repository": "organization/repository", "updated_at": "2022-04-01T10:00:00Z"}, {"id": 41, "name": "column_41", "project_id": 4, "repository": "organization/repository", "updated_at": "2022-06-01T10:00:00Z"}, ] assert stream_state == { "organization/repository": { "2": {"updated_at": "2022-04-01T10:00:00Z"}, "3": {"updated_at": "2022-05-01T10:00:00Z"}, "4": {"updated_at": "2022-06-01T10:00:00Z"}, } } @responses.activate def test_stream_project_cards(): repository_args_with_start_date = { "repositories": ["organization/repository"], "page_size_for_large_streams": 100, "start_date": "2022-03-01T00:00:00Z", } projects_stream = Projects(**repository_args_with_start_date) project_columns_stream = ProjectColumns(projects_stream, **repository_args_with_start_date) stream = ProjectCards(project_columns_stream, **repository_args_with_start_date) data = [ { "updated_at": "2022-01-01T00:00:00Z", }, { "updated_at": "2022-06-01T00:00:00Z", "columns": [ { "updated_at": "2022-04-01T00:00:00Z", "cards": [ {"updated_at": "2022-03-01T00:00:00Z"}, {"updated_at": "2022-04-01T00:00:00Z"}, ], }, {"updated_at": "2022-05-01T09:00:00Z"}, { "updated_at": "2022-06-01T00:00:00Z", "cards": [ {"updated_at": "2022-05-01T00:00:00Z"}, {"updated_at": "2022-06-01T00:00:00Z"}, ], }, ], }, { "updated_at": "2022-05-01T00:00:00Z", "columns": [ {"updated_at": "2022-01-01T00:00:00Z"}, { "updated_at": "2022-05-01T00:00:00Z", "cards": [ {"updated_at": "2022-02-01T00:00:00Z"}, {"updated_at": "2022-05-01T00:00:00Z"}, ], }, ], }, ] ProjectsResponsesAPI.register(data) stream_state = {} projects_stream._session.cache.clear() project_columns_stream._session.cache.clear() records = read_incremental(stream, stream_state=stream_state) assert records == [ { "column_id": 21, "id": 212, "name": "card_212", "project_id": 2, "repository": "organization/repository", "updated_at": "2022-04-01T00:00:00Z", }, { "column_id": 23, "id": 231, "name": "card_231", "project_id": 2, "repository": "organization/repository", "updated_at": "2022-05-01T00:00:00Z", }, { "column_id": 23, "id": 232, "name": "card_232", "project_id": 2, "repository": "organization/repository", "updated_at": "2022-06-01T00:00:00Z", }, { "column_id": 32, "id": 322, "name": "card_322", "project_id": 3, "repository": "organization/repository", "updated_at": "2022-05-01T00:00:00Z", }, ] @responses.activate def test_stream_comments(): repository_args_with_start_date = { "repositories": ["organization/repository", "airbytehq/airbyte"], "page_size_for_large_streams": 2, "start_date": "2022-02-02T10:10:01Z", } stream = Comments(**repository_args_with_start_date) data = [ {"id": 1, "updated_at": "2022-02-02T10:10:02Z"}, {"id": 2, "updated_at": "2022-02-02T10:10:04Z"}, {"id": 3, "updated_at": "2022-02-02T10:12:06Z"}, {"id": 4, "updated_at": "2022-02-02T10:12:08Z"}, {"id": 5, "updated_at": "2022-02-02T10:12:10Z"}, {"id": 6, "updated_at": "2022-02-02T10:12:12Z"}, ] api_url = "https://api.github.com/repos/organization/repository/issues/comments" responses.add( "GET", api_url, json=data[0:2], match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:01Z", "per_page": "2"})], ) responses.add( "GET", api_url, json=data[1:3], headers={ "Link": '; rel="next"' }, match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:04Z", "per_page": "2"})], ) responses.add( "GET", api_url, json=data[3:5], headers={ "Link": '; rel="next"' }, match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:04Z", "page": "2", "per_page": "2"})], ) responses.add( "GET", api_url, json=data[5:], match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:04Z", "page": "3", "per_page": "2"})], ) data = [ {"id": 1, "updated_at": "2022-02-02T10:11:02Z"}, {"id": 2, "updated_at": "2022-02-02T10:11:04Z"}, {"id": 3, "updated_at": "2022-02-02T10:13:06Z"}, {"id": 4, "updated_at": "2022-02-02T10:13:08Z"}, {"id": 5, "updated_at": "2022-02-02T10:13:10Z"}, {"id": 6, "updated_at": "2022-02-02T10:13:12Z"}, ] api_url = "https://api.github.com/repos/airbytehq/airbyte/issues/comments" responses.add( "GET", api_url, json=data[0:2], match=[matchers.query_param_matcher({"since": "2022-02-02T10:10:01Z", "per_page": "2"})], ) responses.add( "GET", api_url, json=data[1:3], headers={ "Link": '; rel="next"' }, match=[matchers.query_param_matcher({"since": "2022-02-02T10:11:04Z", "per_page": "2"})], ) responses.add( "GET", api_url, json=data[3:5], headers={ "Link": '; rel="next"' }, match=[matchers.query_param_matcher({"since": "2022-02-02T10:11:04Z", "page": "2", "per_page": "2"})], ) responses.add( "GET", api_url, json=data[5:], match=[matchers.query_param_matcher({"since": "2022-02-02T10:11:04Z", "page": "3", "per_page": "2"})], ) stream_state = {} records = read_incremental(stream, stream_state) assert records == [ {"id": 1, "repository": "organization/repository", "updated_at": "2022-02-02T10:10:02Z"}, {"id": 2, "repository": "organization/repository", "updated_at": "2022-02-02T10:10:04Z"}, {"id": 1, "repository": "airbytehq/airbyte", "updated_at": "2022-02-02T10:11:02Z"}, {"id": 2, "repository": "airbytehq/airbyte", "updated_at": "2022-02-02T10:11:04Z"}, ] assert stream_state == { "airbytehq/airbyte": {"updated_at": "2022-02-02T10:11:04Z"}, "organization/repository": {"updated_at": "2022-02-02T10:10:04Z"}, } records = read_incremental(stream, stream_state) assert records == [ {"id": 3, "repository": "organization/repository", "updated_at": "2022-02-02T10:12:06Z"}, {"id": 4, "repository": "organization/repository", "updated_at": "2022-02-02T10:12:08Z"}, {"id": 5, "repository": "organization/repository", "updated_at": "2022-02-02T10:12:10Z"}, {"id": 6, "repository": "organization/repository", "updated_at": "2022-02-02T10:12:12Z"}, {"id": 3, "repository": "airbytehq/airbyte", "updated_at": "2022-02-02T10:13:06Z"}, {"id": 4, "repository": "airbytehq/airbyte", "updated_at": "2022-02-02T10:13:08Z"}, {"id": 5, "repository": "airbytehq/airbyte", "updated_at": "2022-02-02T10:13:10Z"}, {"id": 6, "repository": "airbytehq/airbyte", "updated_at": "2022-02-02T10:13:12Z"}, ] assert stream_state == { "airbytehq/airbyte": {"updated_at": "2022-02-02T10:13:12Z"}, "organization/repository": {"updated_at": "2022-02-02T10:12:12Z"}, } @responses.activate def test_streams_read_full_refresh(): repository_args = { "repositories": ["organization/repository"], "page_size_for_large_streams": 100, } repository_args_with_start_date = {**repository_args, "start_date": "2022-02-01T00:00:00Z"} def get_json_response(cursor_field): cursor_field = cursor_field or "updated_at" return [ {"id": 1, cursor_field: "2022-02-01T00:00:00Z"}, {"id": 2, cursor_field: "2022-02-02T00:00:00Z"}, ] def get_records(cursor_field): cursor_field = cursor_field or "updated_at" return [ {"id": 1, cursor_field: "2022-02-01T00:00:00Z", "repository": "organization/repository"}, {"id": 2, cursor_field: "2022-02-02T00:00:00Z", "repository": "organization/repository"}, ] for cls, url in [ (Releases, "https://api.github.com/repos/organization/repository/releases"), (IssueEvents, "https://api.github.com/repos/organization/repository/issues/events"), (IssueMilestones, "https://api.github.com/repos/organization/repository/milestones"), (CommitComments, "https://api.github.com/repos/organization/repository/comments"), (Deployments, "https://api.github.com/repos/organization/repository/deployments"), ]: stream = cls(**repository_args_with_start_date) responses.add("GET", url, json=get_json_response(stream.cursor_field)) records = list(read_full_refresh(stream)) assert records == get_records(stream.cursor_field)[1:2] for cls, url in [ (Tags, "https://api.github.com/repos/organization/repository/tags"), (IssueLabels, "https://api.github.com/repos/organization/repository/labels"), (Collaborators, "https://api.github.com/repos/organization/repository/collaborators"), (Branches, "https://api.github.com/repos/organization/repository/branches"), ]: stream = cls(**repository_args) responses.add("GET", url, json=get_json_response(stream.cursor_field)) records = list(read_full_refresh(stream)) assert records == get_records(stream.cursor_field) responses.add( "GET", "https://api.github.com/repos/organization/repository/stargazers", json=[ {"starred_at": "2022-02-01T00:00:00Z", "user": {"id": 1}}, {"starred_at": "2022-02-02T00:00:00Z", "user": {"id": 2}}, ], ) stream = Stargazers(**repository_args_with_start_date) records = list(read_full_refresh(stream)) assert records == [{"repository": "organization/repository", "starred_at": "2022-02-02T00:00:00Z", "user": {"id": 2}, "user_id": 2}] @responses.activate def test_stream_reviews_incremental_read(): repository_args_with_start_date = { "start_date": "2000-01-01T00:00:00Z", "page_size_for_large_streams": 30, "repositories": ["airbytehq/airbyte"], } stream = Reviews(**repository_args_with_start_date) stream.page_size = 2 f = Path(__file__).parent / "graphql_reviews_responses.json" response_objects = json.load(open(f)) def request_callback(request): return (HTTPStatus.OK, {}, json.dumps(response_objects.pop(0))) responses.add_callback( responses.POST, "https://api.github.com/graphql", callback=request_callback, content_type="application/json", ) stream_state = {} records = read_incremental(stream, stream_state) assert [r["id"] for r in records] == [1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008] assert stream_state == {"airbytehq/airbyte": {"updated_at": "2000-01-01T00:00:01Z"}} assert len(responses.calls) == 4 responses.calls.reset() records = read_incremental(stream, stream_state) assert [r["id"] for r in records] == [1000, 1007, 1009] assert stream_state == {"airbytehq/airbyte": {"updated_at": "2000-01-01T00:00:02Z"}} assert len(responses.calls) == 4 @responses.activate def test_stream_team_members_full_refresh(caplog): organization_args = {"organizations": ["org1"]} repository_args = {"repositories": [], "page_size_for_large_streams": 100} responses.add("GET", "https://api.github.com/orgs/org1/teams", json=[{"slug": "team1"}, {"slug": "team2"}]) responses.add("GET", "https://api.github.com/orgs/org1/teams/team1/members", json=[{"login": "login1"}, {"login": "login2"}]) responses.add("GET", "https://api.github.com/orgs/org1/teams/team1/memberships/login1", json={"username": "login1"}) responses.add("GET", "https://api.github.com/orgs/org1/teams/team1/memberships/login2", json={"username": "login2"}) responses.add("GET", "https://api.github.com/orgs/org1/teams/team2/members", json=[{"login": "login2"}, {"login": "login3"}]) responses.add("GET", "https://api.github.com/orgs/org1/teams/team2/memberships/login2", json={"username": "login2"}) responses.add("GET", "https://api.github.com/orgs/org1/teams/team2/memberships/login3", status=requests.codes.NOT_FOUND) teams_stream = Teams(**organization_args) stream = TeamMembers(parent=teams_stream, **repository_args) teams_stream._session.cache.clear() records = list(read_full_refresh(stream)) assert records == [ {"login": "login1", "organization": "org1", "team_slug": "team1"}, {"login": "login2", "organization": "org1", "team_slug": "team1"}, {"login": "login2", "organization": "org1", "team_slug": "team2"}, {"login": "login3", "organization": "org1", "team_slug": "team2"} ] stream = TeamMemberships(parent=stream, **repository_args) records = list(read_full_refresh(stream)) assert records == [ {"username": "login1", "organization": "org1", "team_slug": "team1"}, {"username": "login2", "organization": "org1", "team_slug": "team1"}, {"username": "login2", "organization": "org1", "team_slug": "team2"}, ] expected_message = "Syncing `TeamMemberships` stream for organization `org1`, team `team2` and user `login3` isn't available: User has no team membership. Skipping..." assert expected_message in caplog.messages @responses.activate def test_stream_commit_comment_reactions_incremental_read(): repository_args = {"repositories": ["airbytehq/integration-test"], "page_size_for_large_streams": 100} stream = CommitCommentReactions(**repository_args) responses.add( "GET", "https://api.github.com/repos/airbytehq/integration-test/comments", json=[ {"id": 55538825, "updated_at": "2021-01-01T15:00:00Z"}, {"id": 55538826, "updated_at": "2021-01-01T16:00:00Z"}, ], ) responses.add( "GET", "https://api.github.com/repos/airbytehq/integration-test/comments/55538825/reactions", json=[ {"id": 154935429, "created_at": "2022-01-01T15:00:00Z"}, {"id": 154935430, "created_at": "2022-01-01T16:00:00Z"}, ], ) responses.add( "GET", "https://api.github.com/repos/airbytehq/integration-test/comments/55538826/reactions", json=[{"id": 154935431, "created_at": "2022-01-01T17:00:00Z"}], ) stream_state = {} records = read_incremental(stream, stream_state) assert stream_state == { "airbytehq/integration-test": { "55538825": {"created_at": "2022-01-01T16:00:00Z"}, "55538826": {"created_at": "2022-01-01T17:00:00Z"}, } } assert records == [ {"id": 154935429, "comment_id": 55538825, "created_at": "2022-01-01T15:00:00Z", "repository": "airbytehq/integration-test"}, {"id": 154935430, "comment_id": 55538825, "created_at": "2022-01-01T16:00:00Z", "repository": "airbytehq/integration-test"}, {"id": 154935431, "comment_id": 55538826, "created_at": "2022-01-01T17:00:00Z", "repository": "airbytehq/integration-test"}, ] responses.add( "GET", "https://api.github.com/repos/airbytehq/integration-test/comments", json=[ {"id": 55538825, "updated_at": "2021-01-01T15:00:00Z"}, {"id": 55538826, "updated_at": "2021-01-01T16:00:00Z"}, {"id": 55538827, "updated_at": "2022-02-01T15:00:00Z"}, ], ) responses.add( "GET", "https://api.github.com/repos/airbytehq/integration-test/comments/55538826/reactions", json=[ {"id": 154935431, "created_at": "2022-01-01T17:00:00Z"}, {"id": 154935432, "created_at": "2022-02-01T16:00:00Z"}, ], ) responses.add( "GET", "https://api.github.com/repos/airbytehq/integration-test/comments/55538827/reactions", json=[{"id": 154935433, "created_at": "2022-02-01T17:00:00Z"}], ) stream._parent_stream._session.cache.clear() records = read_incremental(stream, stream_state) assert records == [ {"id": 154935432, "comment_id": 55538826, "created_at": "2022-02-01T16:00:00Z", "repository": "airbytehq/integration-test"}, {"id": 154935433, "comment_id": 55538827, "created_at": "2022-02-01T17:00:00Z", "repository": "airbytehq/integration-test"}, ] @responses.activate def test_stream_workflow_runs_read_incremental(monkeypatch): repository_args_with_start_date = { "repositories": ["org/repos"], "page_size_for_large_streams": 30, "start_date": "2022-01-01T00:00:00Z", } monkeypatch.setattr(constants, "DEFAULT_PAGE_SIZE", 1) stream = WorkflowRuns(**repository_args_with_start_date) data = [ {"id": 4, "created_at": "2022-02-05T00:00:00Z", "updated_at": "2022-02-05T00:00:00Z", "repository": {"full_name": "org/repos"}}, {"id": 3, "created_at": "2022-01-15T00:00:00Z", "updated_at": "2022-01-15T00:00:00Z", "repository": {"full_name": "org/repos"}}, {"id": 2, "created_at": "2022-01-03T00:00:00Z", "updated_at": "2022-01-03T00:00:00Z", "repository": {"full_name": "org/repos"}}, {"id": 1, "created_at": "2022-01-02T00:00:00Z", "updated_at": "2022-01-02T00:00:00Z", "repository": {"full_name": "org/repos"}}, ] responses.add( "GET", "https://api.github.com/repos/org/repos/actions/runs", json={"total_count": len(data), "workflow_runs": data[0:1]}, headers={"Link": '; rel="next"'}, match=[matchers.query_param_matcher({"per_page": "1"}, strict_match=True)], ) responses.add( "GET", "https://api.github.com/repos/org/repos/actions/runs", json={"total_count": len(data), "workflow_runs": data[1:2]}, headers={"Link": '; rel="next"'}, match=[matchers.query_param_matcher({"per_page": "1", "page": "2"}, strict_match=True)], ) responses.add( "GET", "https://api.github.com/repos/org/repos/actions/runs", json={"total_count": len(data), "workflow_runs": data[2:3]}, headers={"Link": '; rel="next"'}, match=[matchers.query_param_matcher({"per_page": "1", "page": "3"}, strict_match=True)], ) responses.add( "GET", "https://api.github.com/repos/org/repos/actions/runs", json={"total_count": len(data), "workflow_runs": data[3:4]}, match=[matchers.query_param_matcher({"per_page": "1", "page": "4"}, strict_match=True)], ) state = {} records = read_incremental(stream, state) assert state == {"org/repos": {"updated_at": "2022-02-05T00:00:00Z"}} assert records == [ {"id": 4, "repository": {"full_name": "org/repos"}, "created_at": "2022-02-05T00:00:00Z", "updated_at": "2022-02-05T00:00:00Z"}, {"id": 3, "repository": {"full_name": "org/repos"}, "created_at": "2022-01-15T00:00:00Z", "updated_at": "2022-01-15T00:00:00Z"}, {"id": 2, "repository": {"full_name": "org/repos"}, "created_at": "2022-01-03T00:00:00Z", "updated_at": "2022-01-03T00:00:00Z"}, {"id": 1, "repository": {"full_name": "org/repos"}, "created_at": "2022-01-02T00:00:00Z", "updated_at": "2022-01-02T00:00:00Z"}, ] assert len(responses.calls) == 4 data.insert( 0, { "id": 5, "created_at": "2022-02-07T00:00:00Z", "updated_at": "2022-02-07T00:00:00Z", "repository": {"full_name": "org/repos"}, }, ) data[2]["updated_at"] = "2022-02-08T00:00:00Z" responses.add( "GET", "https://api.github.com/repos/org/repos/actions/runs", json={"total_count": len(data), "workflow_runs": data[0:1]}, headers={"Link": '; rel="next"'}, match=[matchers.query_param_matcher({"per_page": "1"}, strict_match=True)], ) responses.add( "GET", "https://api.github.com/repos/org/repos/actions/runs", json={"total_count": len(data), "workflow_runs": data[1:2]}, headers={"Link": '; rel="next"'}, match=[matchers.query_param_matcher({"per_page": "1", "page": "2"}, strict_match=True)], ) responses.add( "GET", "https://api.github.com/repos/org/repos/actions/runs", json={"total_count": len(data), "workflow_runs": data[2:3]}, headers={"Link": '; rel="next"'}, match=[matchers.query_param_matcher({"per_page": "1", "page": "3"}, strict_match=True)], ) responses.add( "GET", "https://api.github.com/repos/org/repos/actions/runs", json={"total_count": len(data), "workflow_runs": data[3:4]}, headers={"Link": '; rel="next"'}, match=[matchers.query_param_matcher({"per_page": "1", "page": "4"}, strict_match=True)], ) responses.calls.reset() records = read_incremental(stream, state) assert state == {"org/repos": {"updated_at": "2022-02-08T00:00:00Z"}} assert records == [ {"id": 5, "repository": {"full_name": "org/repos"}, "created_at": "2022-02-07T00:00:00Z", "updated_at": "2022-02-07T00:00:00Z"}, {"id": 3, "repository": {"full_name": "org/repos"}, "created_at": "2022-01-15T00:00:00Z", "updated_at": "2022-02-08T00:00:00Z"}, ] assert len(responses.calls) == 4 @responses.activate def test_stream_workflow_jobs_read(): repository_args = { "repositories": ["org/repo"], "page_size_for_large_streams": 100, } repository_args_with_start_date = {**repository_args, "start_date": "2022-09-02T09:05:00Z"} workflow_runs_stream = WorkflowRuns(**repository_args_with_start_date) stream = WorkflowJobs(workflow_runs_stream, **repository_args_with_start_date) workflow_runs = [ { "id": 1, "created_at": "2022-09-02T09:00:00Z", "updated_at": "2022-09-02T09:10:02Z", "repository": {"full_name": "org/repo"}, }, { "id": 2, "created_at": "2022-09-02T09:06:00Z", "updated_at": "2022-09-02T09:08:00Z", "repository": {"full_name": "org/repo"}, }, ] workflow_jobs_1 = [ {"id": 1, "completed_at": "2022-09-02T09:02:00Z", "run_id": 1}, {"id": 4, "completed_at": "2022-09-02T09:10:00Z", "run_id": 1}, {"id": 5, "completed_at": None, "run_id": 1}, ] workflow_jobs_2 = [ {"id": 2, "completed_at": "2022-09-02T09:07:00Z", "run_id": 2}, {"id": 3, "completed_at": "2022-09-02T09:08:00Z", "run_id": 2}, ] responses.add( "GET", "https://api.github.com/repos/org/repo/actions/runs", json={"total_count": len(workflow_runs), "workflow_runs": workflow_runs}, ) responses.add("GET", "https://api.github.com/repos/org/repo/actions/runs/1/jobs", json={"jobs": workflow_jobs_1}) responses.add("GET", "https://api.github.com/repos/org/repo/actions/runs/2/jobs", json={"jobs": workflow_jobs_2}) state = {} records = read_incremental(stream, state) assert state == {"org/repo": {"completed_at": "2022-09-02T09:10:00Z"}} assert records == [ {"completed_at": "2022-09-02T09:10:00Z", "id": 4, "repository": "org/repo", "run_id": 1}, {"completed_at": "2022-09-02T09:07:00Z", "id": 2, "repository": "org/repo", "run_id": 2}, {"completed_at": "2022-09-02T09:08:00Z", "id": 3, "repository": "org/repo", "run_id": 2}, ] assert len(responses.calls) == 3 workflow_jobs_1[2]["completed_at"] = "2022-09-02T09:12:00Z" workflow_runs[0]["updated_at"] = "2022-09-02T09:12:01Z" workflow_runs.append( { "id": 3, "created_at": "2022-09-02T09:14:00Z", "updated_at": "2022-09-02T09:15:00Z", "repository": {"full_name": "org/repo"}, } ) workflow_jobs_3 = [ {"id": 6, "completed_at": "2022-09-02T09:15:00Z", "run_id": 3}, {"id": 7, "completed_at": None, "run_id": 3}, ] responses.add( "GET", "https://api.github.com/repos/org/repo/actions/runs", json={"total_count": len(workflow_runs), "workflow_runs": workflow_runs}, ) responses.add("GET", "https://api.github.com/repos/org/repo/actions/runs/1/jobs", json={"jobs": workflow_jobs_1}) responses.add("GET", "https://api.github.com/repos/org/repo/actions/runs/2/jobs", json={"jobs": workflow_jobs_2}) responses.add("GET", "https://api.github.com/repos/org/repo/actions/runs/3/jobs", json={"jobs": workflow_jobs_3}) responses.calls.reset() records = read_incremental(stream, state) assert state == {"org/repo": {"completed_at": "2022-09-02T09:15:00Z"}} assert records == [ {"completed_at": "2022-09-02T09:12:00Z", "id": 5, "repository": "org/repo", "run_id": 1}, {"completed_at": "2022-09-02T09:15:00Z", "id": 6, "repository": "org/repo", "run_id": 3}, ] records = list(read_full_refresh(stream)) assert records == [ {"id": 4, "completed_at": "2022-09-02T09:10:00Z", "run_id": 1, "repository": "org/repo"}, {"id": 5, "completed_at": "2022-09-02T09:12:00Z", "run_id": 1, "repository": "org/repo"}, {"id": 2, "completed_at": "2022-09-02T09:07:00Z", "run_id": 2, "repository": "org/repo"}, {"id": 3, "completed_at": "2022-09-02T09:08:00Z", "run_id": 2, "repository": "org/repo"}, {"id": 6, "completed_at": "2022-09-02T09:15:00Z", "run_id": 3, "repository": "org/repo"}, ] @responses.activate def test_stream_pull_request_comment_reactions_read(): repository_args_with_start_date = { "start_date": "2022-01-01T00:00:00Z", "page_size_for_large_streams": 2, "repositories": ["airbytehq/airbyte"], } stream = PullRequestCommentReactions(**repository_args_with_start_date) stream.page_size = 2 f = Path(__file__).parent / "pull_request_comment_reactions.json" response_objects = json.load(open(f)) def request_callback(request): return (HTTPStatus.OK, {}, json.dumps(response_objects.pop(0))) responses.add_callback( responses.POST, "https://api.github.com/graphql", callback=request_callback, content_type="application/json", ) stream_state = {} records = read_incremental(stream, stream_state) records = [{"comment_id": r["comment_id"], "created_at": r["created_at"], "node_id": r["node_id"]} for r in records] assert records == [ {"comment_id": "comment1", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction1"}, {"comment_id": "comment1", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction2"}, {"comment_id": "comment2", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction3"}, {"comment_id": "comment2", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction4"}, {"comment_id": "comment2", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction5"}, {"comment_id": "comment5", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction6"}, {"comment_id": "comment7", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction7"}, {"comment_id": "comment8", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction8"}, ] assert stream_state == {"airbytehq/airbyte": {"created_at": "2022-01-01T00:00:01Z"}} records = read_incremental(stream, stream_state) records = [{"comment_id": r["comment_id"], "created_at": r["created_at"], "node_id": r["node_id"]} for r in records] assert records == [ {"comment_id": "comment2", "created_at": "2022-01-02T00:00:01Z", "node_id": "reaction9"}, {"comment_id": "comment8", "created_at": "2022-01-02T00:00:01Z", "node_id": "reaction10"}, ] assert stream_state == {"airbytehq/airbyte": {"created_at": "2022-01-02T00:00:01Z"}} @responses.activate def test_stream_projects_v2_graphql_retry(): repository_args_with_start_date = { "start_date": "2022-01-01T00:00:00Z", "page_size_for_large_streams": 20, "repositories": ["airbytehq/airbyte"], } stream = ProjectsV2(**repository_args_with_start_date) resp = responses.add(responses.POST, "https://api.github.com/graphql", json={"errors": "not found"}, status=200, ) with patch.object(stream, "backoff_time", return_value=0.01), pytest.raises(UserDefinedBackoffException): read_incremental(stream, stream_state={}) assert resp.call_count == stream.max_retries + 1 def test_stream_projects_v2_graphql_query(): repository_args_with_start_date = { "start_date": "2022-01-01T00:00:00Z", "page_size_for_large_streams": 20, "repositories": ["airbytehq/airbyte"], } stream = ProjectsV2(**repository_args_with_start_date) query = stream.request_body_json(stream_state={}, stream_slice={'repository': 'airbytehq/airbyte'}) f = Path(__file__).parent / "projects_v2_pull_requests_query.json" expected_query = json.load(open(f)) assert query == expected_query