1
0
mirror of synced 2025-12-23 21:03:15 -05:00

🐛 Source Github: add incremental for repositories, workflows, pull_request_comment_reactions, issue_reactions, issue_comment_reactions, commit_comment_reactions (#12294)

Signed-off-by: Sergey Chvalyuk <grubberr@gmail.com>
This commit is contained in:
Serhii Chvaliuk
2022-05-13 21:18:24 +03:00
committed by GitHub
parent e49faedaed
commit 8c394b3734
16 changed files with 300 additions and 70 deletions

View File

@@ -295,7 +295,7 @@
- name: GitHub
sourceDefinitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e
dockerRepository: airbyte/source-github
dockerImageTag: 0.2.29
dockerImageTag: 0.2.30
documentationUrl: https://docs.airbyte.io/integrations/sources/github
icon: github.svg
sourceType: api

View File

@@ -2503,7 +2503,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-github:0.2.29"
- dockerImage: "airbyte/source-github:0.2.30"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/sources/github"
connectionSpecification:

View File

@@ -12,5 +12,5 @@ RUN pip install .
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]
LABEL io.airbyte.version=0.2.29
LABEL io.airbyte.version=0.2.30
LABEL io.airbyte.name=airbyte/source-github

View File

@@ -21,24 +21,30 @@ tests:
future_state_path: "integration_tests/abnormal_state.json"
cursor_paths:
comments: ["airbytehq/integration-test", "updated_at"]
commit_comment_reactions: ["airbytehq/integration-test", "55538825", "created_at"]
commit_comments: ["airbytehq/integration-test", "updated_at"]
commits: ["airbytehq/integration-test", "master", "created_at"]
deployments: ["airbytehq/integration-test", "updated_at"]
events: ["airbytehq/integration-test", "created_at"]
issue_comment_reactions: ["airbytehq/integration-test", "907296275", "created_at"]
issue_events: ["airbytehq/integration-test", "created_at"]
issue_milestones: ["airbytehq/integration-test", "updated_at"]
issue_reactions: ["airbytehq/integration-test", "11", "created_at"]
issues: ["airbytehq/integration-test", "updated_at"]
project_cards:
["airbytehq/integration-test", "13167124", "17807006", "updated_at"]
project_columns:
["airbytehq/integration-test", "13167124", "updated_at"]
projects: ["airbytehq/integration-test", "updated_at"]
pull_request_comment_reactions: ["airbytehq/integration-test", "699253726", "created_at"]
pull_request_stats: ["airbytehq/integration-test", "updated_at"]
pull_requests: ["airbytehq/integration-test", "updated_at"]
releases: ["airbytehq/integration-test", "created_at"]
repositories: ["airbytehq", "updated_at"]
review_comments: ["airbytehq/integration-test", "updated_at"]
reviews: ["airbytehq/integration-test", "pull_request_updated_at"]
stargazers: ["airbytehq/integration-test", "starred_at"]
workflows: ["airbytehq/integration-test", "updated_at"]
full_refresh:
- config_path: "secrets/config.json"
configured_catalog_path: "integration_tests/configured_catalog.json"

View File

@@ -4,6 +4,16 @@
"updated_at": "2121-06-30T10:22:10Z"
}
},
"commit_comment_reactions": {
"airbytehq/integration-test": {
"55538825": {
"created_at": "2121-12-31T23:59:59Z"
},
"55538840": {
"created_at": "2121-12-31T23:59:59Z"
}
}
},
"commit_comments": {
"airbytehq/integration-test": {
"updated_at": "2121-04-30T20:36:17Z"
@@ -24,6 +34,13 @@
"created_at": "2121-06-29T03:44:45Z"
}
},
"issue_comment_reactions": {
"airbytehq/integration-test": {
"907296275": {
"created_at": "2121-12-31T23:59:59Z"
}
}
},
"issue_events": {
"airbytehq/integration-test": {
"created_at": "2121-06-29T01:49:42Z"
@@ -34,6 +51,13 @@
"updated_at": "2121-06-25T22:28:33Z"
}
},
"issue_reactions": {
"airbytehq/integration-test": {
"11": {
"created_at": "2121-12-31T23:59:59Z"
}
}
},
"issues": {
"airbytehq/integration-test": {
"updated_at": "2121-06-30T06:44:42Z"
@@ -63,6 +87,13 @@
"updated_at": "2121-06-28T17:24:51Z"
}
},
"pull_request_comment_reactions": {
"airbytehq/integration-test": {
"699253726": {
"created_at": "2121-12-31T23:59:59Z"
}
}
},
"pull_request_stats": {
"airbytehq/integration-test": {
"updated_at": "2121-06-29T02:04:57Z"
@@ -78,6 +109,11 @@
"created_at": "2121-06-23T23:57:07Z"
}
},
"repositories": {
"airbytehq": {
"updated_at": "2121-12-31T23:59:59Z"
}
},
"review_comments": {
"airbytehq/integration-test": {
"updated_at": "2121-06-23T23:57:07Z"
@@ -92,5 +128,10 @@
"airbytehq/integration-test": {
"starred_at": "2121-06-29T02:04:57Z"
}
},
"workflows": {
"airbytehq/integration-test": {
"updated_at": "2121-12-31T23:59:59Z"
}
}
}

View File

@@ -47,10 +47,10 @@
"stream": {
"name": "commit_comment_reactions",
"json_schema": {},
"supported_sync_modes": ["full_refresh"],
"supported_sync_modes": ["full_refresh", "incremental"],
"source_defined_primary_key": [["id"]]
},
"sync_mode": "full_refresh",
"sync_mode": "incremental",
"destination_sync_mode": "overwrite"
},
{
@@ -109,10 +109,10 @@
"stream": {
"name": "issue_comment_reactions",
"json_schema": {},
"supported_sync_modes": ["full_refresh"],
"supported_sync_modes": ["full_refresh", "incremental"],
"source_defined_primary_key": [["id"]]
},
"sync_mode": "full_refresh",
"sync_mode": "incremental",
"destination_sync_mode": "overwrite"
},
{
@@ -155,10 +155,10 @@
"stream": {
"name": "issue_reactions",
"json_schema": {},
"supported_sync_modes": ["full_refresh"],
"supported_sync_modes": ["full_refresh", "incremental"],
"source_defined_primary_key": [["id"]]
},
"sync_mode": "full_refresh",
"sync_mode": "incremental",
"destination_sync_mode": "overwrite"
},
{
@@ -227,10 +227,10 @@
"stream": {
"name": "pull_request_comment_reactions",
"json_schema": {},
"supported_sync_modes": ["full_refresh"],
"supported_sync_modes": ["full_refresh", "incremental"],
"source_defined_primary_key": [["id"]]
},
"sync_mode": "full_refresh",
"sync_mode": "incremental",
"destination_sync_mode": "overwrite"
},
{
@@ -286,10 +286,10 @@
"stream": {
"name": "repositories",
"json_schema": {},
"supported_sync_modes": ["full_refresh"],
"supported_sync_modes": ["full_refresh", "incremental"],
"source_defined_primary_key": [["id"]]
},
"sync_mode": "full_refresh",
"sync_mode": "incremental",
"destination_sync_mode": "overwrite"
},
{
@@ -365,10 +365,10 @@
"stream": {
"name": "workflows",
"json_schema": {},
"supported_sync_modes": ["full_refresh"],
"supported_sync_modes": ["full_refresh", "incremental"],
"source_defined_primary_key": [["id"]]
},
"sync_mode": "full_refresh",
"sync_mode": "incremental",
"destination_sync_mode": "overwrite"
},
{

View File

@@ -8,6 +8,7 @@ from setuptools import find_packages, setup
MAIN_REQUIREMENTS = [
"airbyte-cdk~=0.1.33",
"vcrpy==4.1.1",
"pendulum~=2.1.2",
]
TEST_REQUIREMENTS = ["pytest~=6.1", "source-acceptance-test", "responses~=0.19.0"]

View File

@@ -1,4 +1,28 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$ref": "reaction.json"
"type": ["null", "object"],
"properties": {
"id": {
"type": ["null", "integer"]
},
"node_id": {
"type": ["null", "string"]
},
"content": {
"type": ["null", "string"]
},
"created_at": {
"type": "string",
"format": "date-time"
},
"user": {
"$ref": "user.json"
},
"repository": {
"type": "string"
},
"issue_number": {
"type": "integer"
}
}
}

View File

@@ -244,7 +244,7 @@
"format": "date-time"
},
"updated_at": {
"type": ["null", "string"],
"type": "string",
"format": "date-time"
},
"permissions": {

View File

@@ -11,14 +11,17 @@
"type": ["null", "string"]
},
"created_at": {
"type": ["null", "string"],
"type": "string",
"format": "date-time"
},
"user": {
"$ref": "user.json"
},
"repository": {
"type": ["null", "string"]
"type": "string"
},
"comment_id": {
"type": "integer"
}
}
}

View File

@@ -18,10 +18,12 @@
"type": ["null", "string"]
},
"created_at": {
"type": ["null", "string"]
"type": ["null", "string"],
"format": "date-time"
},
"updated_at": {
"type": ["null", "string"]
"type": "string",
"format": "date-time"
},
"url": {
"type": ["null", "string"]
@@ -31,6 +33,9 @@
},
"badge_url": {
"type": ["null", "string"]
},
"repository": {
"type": "string"
}
}
}

View File

@@ -179,6 +179,7 @@ class SourceGithub(AbstractSource):
page_size = config.get("page_size_for_large_streams", DEFAULT_PAGE_SIZE_FOR_LARGE_STREAM)
organization_args = {"authenticator": authenticator, "organizations": organizations}
organization_args_with_start_date = {**organization_args, "start_date": config["start_date"]}
repository_args = {"authenticator": authenticator, "repositories": repositories, "page_size_for_large_streams": page_size}
repository_args_with_start_date = {**repository_args, "start_date": config["start_date"]}
@@ -214,7 +215,7 @@ class SourceGithub(AbstractSource):
PullRequestStats(parent=pull_requests_stream, **repository_args_with_start_date),
pull_requests_stream,
Releases(**repository_args_with_start_date),
Repositories(**organization_args),
Repositories(**organization_args_with_start_date),
ReviewComments(**repository_args_with_start_date),
Reviews(parent=pull_requests_stream, **repository_args_with_start_date),
Stargazers(**repository_args_with_start_date),

View File

@@ -8,6 +8,7 @@ from copy import deepcopy
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Union
from urllib import parse
import pendulum
import requests
from airbyte_cdk.models import SyncMode
from airbyte_cdk.sources.streams.http import HttpStream, HttpSubStream
@@ -172,7 +173,7 @@ class GithubStream(HttpStream, ABC):
return record
class SemiIncrementalGithubStream(GithubStream):
class SemiIncrementalMixin:
"""
Semi incremental streams are also incremental but with one difference, they:
- read all records;
@@ -191,10 +192,19 @@ class SemiIncrementalGithubStream(GithubStream):
# supporting this.
is_sorted_descending = False
def __init__(self, start_date: str, **kwargs):
def __init__(self, start_date: str = "", **kwargs):
super().__init__(**kwargs)
self._start_date = start_date
@property
def __slice_key(self):
if hasattr(self, "repositories"):
return "repository"
return "organization"
def convert_cursor_value(self, value):
return value
@property
def state_checkpoint_interval(self) -> Optional[int]:
if not self.is_sorted_descending:
@@ -206,18 +216,18 @@ class SemiIncrementalGithubStream(GithubStream):
Return the latest state by comparing the cursor value in the latest record with the stream's most recent state
object and returning an updated state object.
"""
repository = latest_record["repository"]
updated_state = latest_record[self.cursor_field]
stream_state_value = current_stream_state.get(repository, {}).get(self.cursor_field)
slice_value = latest_record[self.__slice_key]
updated_state = self.convert_cursor_value(latest_record[self.cursor_field])
stream_state_value = current_stream_state.get(slice_value, {}).get(self.cursor_field)
if stream_state_value:
updated_state = max(updated_state, stream_state_value)
current_stream_state.setdefault(repository, {})[self.cursor_field] = updated_state
current_stream_state.setdefault(slice_value, {})[self.cursor_field] = updated_state
return current_stream_state
def get_starting_point(self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any]) -> str:
if stream_state:
repository = stream_slice["repository"]
stream_state_value = stream_state.get(repository, {}).get(self.cursor_field)
slice_value = stream_slice[self.__slice_key]
stream_state_value = stream_state.get(slice_value, {}).get(self.cursor_field)
if stream_state_value:
return max(self._start_date, stream_state_value)
return self._start_date
@@ -233,13 +243,14 @@ class SemiIncrementalGithubStream(GithubStream):
for record in super().read_records(
sync_mode=sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state
):
if record[self.cursor_field] > start_point:
cursor_value = self.convert_cursor_value(record[self.cursor_field])
if cursor_value > start_point:
yield record
elif self.is_sorted_descending and record[self.cursor_field] < start_point:
elif self.is_sorted_descending and cursor_value < start_point:
break
class IncrementalGithubStream(SemiIncrementalGithubStream):
class IncrementalMixin(SemiIncrementalMixin):
def request_params(self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, **kwargs) -> MutableMapping[str, Any]:
params = super().request_params(stream_state=stream_state, **kwargs)
since_params = self.get_starting_point(stream_state=stream_state, stream_slice=stream_slice)
@@ -323,11 +334,17 @@ class Organizations(GithubStream):
return record
class Repositories(Organizations):
class Repositories(SemiIncrementalMixin, Organizations):
"""
API docs: https://docs.github.com/en/rest/reference/repos#list-organization-repositories
"""
is_sorted_descending = True
stream_base_params = {
"sort": "updated",
"direction": "desc",
}
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
return f"orgs/{stream_slice['organization']}/repos"
@@ -376,7 +393,7 @@ class Users(Organizations):
# Below are semi incremental streams
class Releases(SemiIncrementalGithubStream):
class Releases(SemiIncrementalMixin, GithubStream):
"""
API docs: https://docs.github.com/en/rest/reference/repos#list-releases
"""
@@ -394,7 +411,7 @@ class Releases(SemiIncrementalGithubStream):
return record
class Events(SemiIncrementalGithubStream):
class Events(SemiIncrementalMixin, GithubStream):
"""
API docs: https://docs.github.com/en/rest/reference/activity#list-repository-events
"""
@@ -402,7 +419,7 @@ class Events(SemiIncrementalGithubStream):
cursor_field = "created_at"
class PullRequests(SemiIncrementalGithubStream):
class PullRequests(SemiIncrementalMixin, GithubStream):
"""
API docs: https://docs.github.com/en/rest/reference/pulls#list-pull-requests
"""
@@ -449,7 +466,7 @@ class PullRequests(SemiIncrementalGithubStream):
return not self._first_read
class CommitComments(SemiIncrementalGithubStream):
class CommitComments(SemiIncrementalMixin, GithubStream):
"""
API docs: https://docs.github.com/en/rest/reference/repos#list-commit-comments-for-a-repository
"""
@@ -458,7 +475,7 @@ class CommitComments(SemiIncrementalGithubStream):
return f"repos/{stream_slice['repository']}/comments"
class IssueMilestones(SemiIncrementalGithubStream):
class IssueMilestones(SemiIncrementalMixin, GithubStream):
"""
API docs: https://docs.github.com/en/rest/reference/issues#list-milestones
"""
@@ -474,7 +491,7 @@ class IssueMilestones(SemiIncrementalGithubStream):
return f"repos/{stream_slice['repository']}/milestones"
class Stargazers(SemiIncrementalGithubStream):
class Stargazers(SemiIncrementalMixin, GithubStream):
"""
API docs: https://docs.github.com/en/rest/reference/activity#list-stargazers
"""
@@ -500,7 +517,7 @@ class Stargazers(SemiIncrementalGithubStream):
return record
class Projects(SemiIncrementalGithubStream):
class Projects(SemiIncrementalMixin, GithubStream):
"""
API docs: https://docs.github.com/en/rest/reference/projects#list-repository-projects
"""
@@ -518,7 +535,7 @@ class Projects(SemiIncrementalGithubStream):
return {**base_headers, **headers}
class IssueEvents(SemiIncrementalGithubStream):
class IssueEvents(SemiIncrementalMixin, GithubStream):
"""
API docs: https://docs.github.com/en/rest/reference/issues#list-issue-events-for-a-repository
"""
@@ -532,7 +549,7 @@ class IssueEvents(SemiIncrementalGithubStream):
# Below are incremental streams
class Comments(IncrementalGithubStream):
class Comments(IncrementalMixin, GithubStream):
"""
API docs: https://docs.github.com/en/rest/reference/issues#list-issue-comments-for-a-repository
"""
@@ -543,7 +560,7 @@ class Comments(IncrementalGithubStream):
return f"repos/{stream_slice['repository']}/issues/comments"
class Commits(IncrementalGithubStream):
class Commits(IncrementalMixin, GithubStream):
"""
API docs: https://docs.github.com/en/rest/reference/repos#list-commits
@@ -559,7 +576,7 @@ class Commits(IncrementalGithubStream):
self.default_branches = default_branches
def request_params(self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, **kwargs) -> MutableMapping[str, Any]:
params = super(IncrementalGithubStream, self).request_params(stream_state=stream_state, stream_slice=stream_slice, **kwargs)
params = super(IncrementalMixin, self).request_params(stream_state=stream_state, stream_slice=stream_slice, **kwargs)
params["since"] = self.get_starting_point(stream_state=stream_state, stream_slice=stream_slice)
params["sha"] = stream_slice["branch"]
return params
@@ -616,7 +633,7 @@ class Commits(IncrementalGithubStream):
return self._start_date
class Issues(IncrementalGithubStream):
class Issues(IncrementalMixin, GithubStream):
"""
API docs: https://docs.github.com/en/rest/reference/issues#list-repository-issues
"""
@@ -630,7 +647,7 @@ class Issues(IncrementalGithubStream):
}
class ReviewComments(IncrementalGithubStream):
class ReviewComments(IncrementalMixin, GithubStream):
"""
API docs: https://docs.github.com/en/rest/reference/pulls#list-review-comments-in-a-repository
"""
@@ -644,7 +661,7 @@ class ReviewComments(IncrementalGithubStream):
# Pull request substreams
class PullRequestSubstream(HttpSubStream, SemiIncrementalGithubStream, ABC):
class PullRequestSubstream(HttpSubStream, SemiIncrementalMixin, GithubStream, ABC):
use_cache = False
def __init__(self, parent: PullRequests, **kwargs):
@@ -675,9 +692,9 @@ class PullRequestSubstream(HttpSubStream, SemiIncrementalGithubStream, ABC):
) -> Iterable[Mapping[str, Any]]:
"""
We've already determined the list of pull requests to run the stream against.
Skip the start_point_map and cursor_field logic in SemiIncrementalGithubStream.read_records.
Skip the start_point_map and cursor_field logic in SemiIncrementalMixin.read_records.
"""
yield from super(SemiIncrementalGithubStream, self).read_records(
yield from super(SemiIncrementalMixin, self).read_records(
sync_mode=sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state
)
@@ -769,13 +786,15 @@ class PullRequestCommits(GithubStream):
class ReactionStream(GithubStream, ABC):
parent_key = "id"
copy_parent_key = "comment_id"
use_cache = False
cursor_field = "created_at"
def __init__(self, **kwargs):
self._stream_kwargs = deepcopy(kwargs)
self._parent_stream = self.parent_entity(**kwargs)
kwargs.pop("start_date", None)
def __init__(self, start_date: str = "", **kwargs):
super().__init__(**kwargs)
kwargs["start_date"] = start_date
self._parent_stream = self.parent_entity(**kwargs)
self._start_date = start_date
@property
@abstractmethod
@@ -786,12 +805,50 @@ class ReactionStream(GithubStream, ABC):
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
parent_path = self._parent_stream.path(stream_slice=stream_slice, **kwargs)
return f"{parent_path}/{stream_slice[self.parent_key]}/reactions"
return f"{parent_path}/{stream_slice[self.copy_parent_key]}/reactions"
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]:
for stream_slice in super().stream_slices(**kwargs):
for parent_record in self._parent_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice):
yield {self.parent_key: parent_record[self.parent_key], "repository": stream_slice["repository"]}
yield {self.copy_parent_key: parent_record[self.parent_key], "repository": stream_slice["repository"]}
def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]):
repository = latest_record["repository"]
parent_id = str(latest_record[self.copy_parent_key])
updated_state = latest_record[self.cursor_field]
stream_state_value = current_stream_state.get(repository, {}).get(parent_id, {}).get(self.cursor_field)
if stream_state_value:
updated_state = max(updated_state, stream_state_value)
current_stream_state.setdefault(repository, {}).setdefault(parent_id, {})[self.cursor_field] = updated_state
return current_stream_state
def get_starting_point(self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any]) -> str:
if stream_state:
repository = stream_slice["repository"]
parent_id = str(stream_slice[self.copy_parent_key])
stream_state_value = stream_state.get(repository, {}).get(parent_id, {}).get(self.cursor_field)
if stream_state_value:
return max(self._start_date, stream_state_value)
return self._start_date
def read_records(
self,
sync_mode: SyncMode,
cursor_field: List[str] = None,
stream_slice: Mapping[str, Any] = None,
stream_state: Mapping[str, Any] = None,
) -> Iterable[Mapping[str, Any]]:
starting_point = self.get_starting_point(stream_state=stream_state, stream_slice=stream_slice)
for record in super().read_records(
sync_mode=sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state
):
if record[self.cursor_field] > starting_point:
yield record
def transform(self, record: MutableMapping[str, Any], stream_slice: Mapping[str, Any]) -> MutableMapping[str, Any]:
record = super().transform(record, stream_slice)
record[self.copy_parent_key] = stream_slice[self.copy_parent_key]
return record
class CommitCommentReactions(ReactionStream):
@@ -817,6 +874,7 @@ class IssueReactions(ReactionStream):
parent_entity = Issues
parent_key = "number"
copy_parent_key = "issue_number"
class PullRequestCommentReactions(ReactionStream):
@@ -827,7 +885,7 @@ class PullRequestCommentReactions(ReactionStream):
parent_entity = ReviewComments
class Deployments(SemiIncrementalGithubStream):
class Deployments(SemiIncrementalMixin, GithubStream):
"""
API docs: https://docs.github.com/en/rest/reference/deployments#list-deployments
"""
@@ -975,7 +1033,7 @@ class ProjectCards(GithubStream):
return record
class Workflows(GithubStream):
class Workflows(SemiIncrementalMixin, GithubStream):
"""
Get all workflows of a GitHub repository
API documentation: https://docs.github.com/en/rest/reference/actions#workflows
@@ -987,7 +1045,10 @@ class Workflows(GithubStream):
def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping]:
response = response.json().get("workflows")
for record in response:
yield record
yield self.transform(record=record, stream_slice=stream_slice)
def convert_cursor_value(self, value):
return pendulum.parse(value).in_tz(tz="UTC").format("YYYY-MM-DDTHH:mm:ss[Z]")
class WorkflowRuns(GithubStream):

View File

@@ -30,7 +30,7 @@ def test_check_connection_repos_only():
@responses.activate
def test_check_connection_repos_and_org_repos():
repos = [{"name": f"name {i}", "full_name": f"full name {i}"} for i in range(1000)]
repos = [{"name": f"name {i}", "full_name": f"full name {i}", "updated_at": "2020-01-01T00:00:00Z"} for i in range(1000)]
responses.add("GET", "https://api.github.com/repos/airbyte/test", json={})
responses.add("GET", "https://api.github.com/repos/airbyte/test2", json={})
responses.add("GET", "https://api.github.com/orgs/airbytehq/repos", json=repos)
@@ -45,7 +45,7 @@ def test_check_connection_repos_and_org_repos():
@responses.activate
def test_check_connection_org_only():
repos = [{"name": f"name {i}", "full_name": f"full name {i}"} for i in range(1000)]
repos = [{"name": f"name {i}", "full_name": f"full name {i}", "updated_at": "2020-01-01T00:00:00Z"} for i in range(1000)]
responses.add("GET", "https://api.github.com/orgs/airbytehq/repos", json=repos)
status = check_source("airbytehq/*")
@@ -107,8 +107,8 @@ def test_generate_repositories():
"GET",
"https://api.github.com/orgs/docker/repos",
json=[
{"full_name": "docker/docker-py"},
{"full_name": "docker/compose"},
{"full_name": "docker/docker-py", "updated_at": "2020-01-01T00:00:00Z"},
{"full_name": "docker/compose", "updated_at": "2020-01-01T00:00:00Z"},
],
)

View File

@@ -14,6 +14,7 @@ from source_github.streams import (
Branches,
Collaborators,
Comments,
CommitCommentReactions,
CommitComments,
Commits,
Deployments,
@@ -48,7 +49,7 @@ DEFAULT_BACKOFF_DELAYS = [5, 10, 20, 40, 80]
def test_internal_server_error_retry(time_mock):
args = {"authenticator": None, "repositories": ["test_repo"], "start_date": "start_date", "page_size_for_large_streams": 30}
stream = PullRequestCommentReactions(**args)
stream_slice = {"repository": "test_repo", "id": "id"}
stream_slice = {"repository": "test_repo", "comment_id": "id"}
time_mock.reset_mock()
responses.add(
@@ -157,20 +158,27 @@ def test_stream_repositories_404():
assert read_full_refresh(stream) == []
assert len(responses.calls) == 1
assert responses.calls[0].request.url == "https://api.github.com/orgs/org_name/repos?per_page=100"
assert responses.calls[0].request.url == "https://api.github.com/orgs/org_name/repos?per_page=100&sort=updated&direction=desc"
@responses.activate
def test_stream_repositories_read():
organization_args = {"organizations": ["org1", "org2"]}
stream = Repositories(**organization_args)
responses.add("GET", "https://api.github.com/orgs/org1/repos", json=[{"id": 1}, {"id": 2}])
responses.add("GET", "https://api.github.com/orgs/org2/repos", json=[{"id": 3}])
updated_at = "2020-01-01T00:00:00Z"
responses.add(
"GET", "https://api.github.com/orgs/org1/repos", json=[{"id": 1, "updated_at": updated_at}, {"id": 2, "updated_at": updated_at}]
)
responses.add("GET", "https://api.github.com/orgs/org2/repos", json=[{"id": 3, "updated_at": updated_at}])
records = read_full_refresh(stream)
assert records == [{"id": 1, "organization": "org1"}, {"id": 2, "organization": "org1"}, {"id": 3, "organization": "org2"}]
assert records == [
{"id": 1, "organization": "org1", "updated_at": updated_at},
{"id": 2, "organization": "org1", "updated_at": updated_at},
{"id": 3, "organization": "org2", "updated_at": updated_at},
]
assert len(responses.calls) == 2
assert responses.calls[0].request.url == "https://api.github.com/orgs/org1/repos?per_page=100"
assert responses.calls[1].request.url == "https://api.github.com/orgs/org2/repos?per_page=100"
assert responses.calls[0].request.url == "https://api.github.com/orgs/org1/repos?per_page=100&sort=updated&direction=desc"
assert responses.calls[1].request.url == "https://api.github.com/orgs/org2/repos?per_page=100&sort=updated&direction=desc"
@responses.activate
@@ -777,3 +785,82 @@ def test_stream_team_members_full_refresh():
{"username": "login2", "organization": "org1", "team_slug": "team1"},
{"username": "login2", "organization": "org1", "team_slug": "team2"},
]
@responses.activate
def test_stream_commit_comment_reactions_incremental_read():
repository_args = {"repositories": ["airbytehq/integration-test"], "page_size_for_large_streams": 100}
stream = CommitCommentReactions(**repository_args)
responses.add(
"GET",
"https://api.github.com/repos/airbytehq/integration-test/comments",
json=[
{"id": 55538825, "updated_at": "2021-01-01T15:00:00Z"},
{"id": 55538826, "updated_at": "2021-01-01T16:00:00Z"},
],
)
responses.add(
"GET",
"https://api.github.com/repos/airbytehq/integration-test/comments/55538825/reactions",
json=[
{"id": 154935429, "created_at": "2022-01-01T15:00:00Z"},
{"id": 154935430, "created_at": "2022-01-01T16:00:00Z"},
],
)
responses.add(
"GET",
"https://api.github.com/repos/airbytehq/integration-test/comments/55538826/reactions",
json=[{"id": 154935431, "created_at": "2022-01-01T17:00:00Z"}],
)
stream_state = {}
records = read_incremental(stream, stream_state)
assert stream_state == {
"airbytehq/integration-test": {
"55538825": {"created_at": "2022-01-01T16:00:00Z"},
"55538826": {"created_at": "2022-01-01T17:00:00Z"},
}
}
assert records == [
{"id": 154935429, "comment_id": 55538825, "created_at": "2022-01-01T15:00:00Z", "repository": "airbytehq/integration-test"},
{"id": 154935430, "comment_id": 55538825, "created_at": "2022-01-01T16:00:00Z", "repository": "airbytehq/integration-test"},
{"id": 154935431, "comment_id": 55538826, "created_at": "2022-01-01T17:00:00Z", "repository": "airbytehq/integration-test"},
]
responses.add(
"GET",
"https://api.github.com/repos/airbytehq/integration-test/comments",
json=[
{"id": 55538825, "updated_at": "2021-01-01T15:00:00Z"},
{"id": 55538826, "updated_at": "2021-01-01T16:00:00Z"},
{"id": 55538827, "updated_at": "2022-02-01T15:00:00Z"},
],
)
responses.add(
"GET",
"https://api.github.com/repos/airbytehq/integration-test/comments/55538826/reactions",
json=[
{"id": 154935431, "created_at": "2022-01-01T17:00:00Z"},
{"id": 154935432, "created_at": "2022-02-01T16:00:00Z"},
],
)
responses.add(
"GET",
"https://api.github.com/repos/airbytehq/integration-test/comments/55538827/reactions",
json=[{"id": 154935433, "created_at": "2022-02-01T17:00:00Z"}],
)
records = read_incremental(stream, stream_state)
assert records == [
{"id": 154935432, "comment_id": 55538826, "created_at": "2022-02-01T16:00:00Z", "repository": "airbytehq/integration-test"},
{"id": 154935433, "comment_id": 55538827, "created_at": "2022-02-01T17:00:00Z", "repository": "airbytehq/integration-test"},
]

View File

@@ -113,6 +113,7 @@ Your token should have at least the `repo` scope. Depending on which streams you
| Version | Date | Pull Request | Subject |
|:--------|:-----------| :--- |:-------------------------------------------------------------------------------------------------------------|
| 0.2.30 | 2022-05-09 | [12294](https://github.com/airbytehq/airbyte/pull/12294) | Add incremental support for streams `CommitCommentReactions`, `IssueCommentReactions`, `IssueReactions`, `PullRequestCommentReactions`, `Repositories`, `Workflows` |
| 0.2.29 | 2022-05-04 | [\#12482](https://github.com/airbytehq/airbyte/pull/12482) | Update input configuration copy |
| 0.2.28 | 2022-04-21 | [11893](https://github.com/airbytehq/airbyte/pull/11893) | Add new streams `TeamMembers`, `TeamMemberships` |
| 0.2.27 | 2022-04-02 | [11678](https://github.com/airbytehq/airbyte/pull/11678) | Fix "PAT Credentials" in spec |