1
0
mirror of synced 2026-01-19 09:07:23 -05:00
Files
airbyte/airbyte-integrations/connectors/source-github/unit_tests/integration/test_assignees.py

183 lines
8.3 KiB
Python

# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
import json
from unittest import TestCase
from airbyte_cdk.models import SyncMode
from airbyte_cdk.test.catalog_builder import CatalogBuilder
from airbyte_cdk.test.entrypoint_wrapper import read
from airbyte_cdk.test.mock_http import HttpMocker, HttpRequest, HttpResponse
from airbyte_cdk.test.mock_http.response_builder import find_template
from airbyte_cdk.test.state_builder import StateBuilder
from source_github import SourceGithub
from .config import ConfigBuilder
_CONFIG = ConfigBuilder().with_repositories(["airbytehq/mock-test-0", "airbytehq/mock-test-1", "airbytehq/mock-test-2"]).build()
def _create_catalog(sync_mode: SyncMode = SyncMode.full_refresh):
return CatalogBuilder().with_stream(name="assignees", sync_mode=sync_mode).build()
class AssigneesTest(TestCase):
def setUp(self) -> None:
"""Base setup for all tests. Add responses for:
1. rate limit checker
2. repositories
3. branches
"""
self.r_mock = HttpMocker()
self.r_mock.__enter__()
self.r_mock.get(
HttpRequest(
url="https://api.github.com/rate_limit",
query_params={},
headers={
"Accept": "application/vnd.github+json",
"X-GitHub-Api-Version": "2022-11-28",
"Authorization": "token GITHUB_TEST_TOKEN",
},
),
HttpResponse(
json.dumps(
{
"resources": {
"core": {"limit": 5000, "used": 0, "remaining": 5000, "reset": 5070908800},
"graphql": {"limit": 5000, "used": 0, "remaining": 5000, "reset": 5070908800},
}
}
),
200,
),
)
self.r_mock.get(
HttpRequest(
url=f"https://api.github.com/repos/{_CONFIG.get('repositories')[0]}",
query_params={"per_page": 100},
),
HttpResponse(json.dumps({"full_name": "airbytehq/mock-test-0", "default_branch": "master"}), 200),
)
self.r_mock.get(
HttpRequest(
url=f"https://api.github.com/repos/{_CONFIG.get('repositories')[1]}",
query_params={"per_page": 100},
),
HttpResponse(json.dumps({"full_name": "airbytehq/mock-test-1", "default_branch": "master"}), 200),
)
self.r_mock.get(
HttpRequest(
url=f"https://api.github.com/repos/{_CONFIG.get('repositories')[2]}",
query_params={"per_page": 100},
),
HttpResponse(json.dumps({"full_name": "airbytehq/mock-test-2", "default_branch": "master"}), 200),
)
def teardown(self):
"""Stops and resets HttpMocker instance."""
self.r_mock.__exit__()
def test_read_full_refresh_emits_per_partition_state(self):
"""Ensure http integration and per-partition state is emitted correctly"""
self.r_mock.get(
HttpRequest(
url=f"https://api.github.com/repos/{_CONFIG.get('repositories')[0]}/assignees",
query_params={"per_page": 100},
),
HttpResponse(json.dumps(find_template("assignees", __file__)), 200),
)
self.r_mock.get(
HttpRequest(
url=f"https://api.github.com/repos/{_CONFIG.get('repositories')[1]}/assignees",
query_params={"per_page": 100},
),
HttpResponse(json.dumps(find_template("assignees", __file__)), 200),
)
self.r_mock.get(
HttpRequest(
url=f"https://api.github.com/repos/{_CONFIG.get('repositories')[2]}/assignees",
query_params={"per_page": 100},
),
HttpResponse(json.dumps(find_template("assignees", __file__)), 200),
)
per_partition_state_0 = {"partition": {"repository": "airbytehq/mock-test-0"}, "cursor": {"__ab_full_refresh_sync_complete": True}}
per_partition_state_1 = {"partition": {"repository": "airbytehq/mock-test-1"}, "cursor": {"__ab_full_refresh_sync_complete": True}}
per_partition_state_2 = {"partition": {"repository": "airbytehq/mock-test-2"}, "cursor": {"__ab_full_refresh_sync_complete": True}}
source = SourceGithub()
actual_messages = read(source, config=_CONFIG, catalog=_create_catalog())
assert len(actual_messages.records) == 6
# Validates that each time we sync a parent partition, the size of the per-partition state is increasing for the final
# state of each parent record
assert len(actual_messages.state_messages) == 3
actual_state_after_first_partition = actual_messages.state_messages[0].state.stream.stream_state.model_dump()
assert len(actual_state_after_first_partition.get("states")) == 1
actual_state_after_second_partition = actual_messages.state_messages[1].state.stream.stream_state.model_dump()
assert len(actual_state_after_second_partition.get("states")) == 2
actual_state_after_third_partition = actual_messages.state_messages[2].state.stream.stream_state.model_dump()
assert len(actual_state_after_third_partition.get("states")) == 3
# Validate that the final set of per-partition states includes the terminal value for each successful parent
final_list_of_per_partition_state = actual_state_after_third_partition.get("states")
assert per_partition_state_0 in final_list_of_per_partition_state
assert per_partition_state_1 in final_list_of_per_partition_state
assert per_partition_state_2 in final_list_of_per_partition_state
def test_read_full_refresh_emits_per_partition_state(self):
"""Ensure that incoming RFR state skips parent records from state that have already been synced on a prior attempt"""
self.r_mock.get(
HttpRequest(
url=f"https://api.github.com/repos/{_CONFIG.get('repositories')[0]}/assignees",
query_params={"per_page": 100},
),
HttpResponse(json.dumps(find_template("assignees", __file__)), 200),
)
self.r_mock.get(
HttpRequest(
url=f"https://api.github.com/repos/{_CONFIG.get('repositories')[1]}/assignees",
query_params={"per_page": 100},
),
HttpResponse(json.dumps(find_template("assignees", __file__)), 200),
)
self.r_mock.get(
HttpRequest(
url=f"https://api.github.com/repos/{_CONFIG.get('repositories')[2]}/assignees",
query_params={"per_page": 100},
),
HttpResponse(json.dumps(find_template("assignees", __file__)), 200),
)
per_partition_state_0 = {"partition": {"repository": "airbytehq/mock-test-0"}, "cursor": {"__ab_full_refresh_sync_complete": True}}
per_partition_state_1 = {"partition": {"repository": "airbytehq/mock-test-1"}, "cursor": {"__ab_full_refresh_sync_complete": True}}
per_partition_state_2 = {"partition": {"repository": "airbytehq/mock-test-2"}, "cursor": {"__ab_full_refresh_sync_complete": True}}
incoming_state = StateBuilder().with_stream_state("assignees", {
"states": [
{"partition": {"repository": "airbytehq/mock-test-0"}, "cursor": {"__ab_full_refresh_sync_complete": True}},
{"partition": {"repository": "airbytehq/mock-test-1"}, "cursor": {"__ab_full_refresh_sync_complete": True}},
]
}).build()
source = SourceGithub()
actual_messages = read(source, config=_CONFIG, catalog=_create_catalog(), state=incoming_state)
assert len(actual_messages.records) == 2
# There should only be on state message since the first two parents were already successfully synced
assert len(actual_messages.state_messages) == 1
final_list_of_per_partition_state = actual_messages.state_messages[0].state.stream.stream_state.model_dump().get("states")
assert per_partition_state_0 in final_list_of_per_partition_state
assert per_partition_state_1 in final_list_of_per_partition_state
assert per_partition_state_2 in final_list_of_per_partition_state