🐛 Source Github: add incremental for repositories, workflows, pull_request_comment_reactions, issue_reactions, issue_comment_reactions, commit_comment_reactions (#12294)
Signed-off-by: Sergey Chvalyuk <grubberr@gmail.com>
This commit is contained in:
@@ -295,7 +295,7 @@
|
||||
- name: GitHub
|
||||
sourceDefinitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e
|
||||
dockerRepository: airbyte/source-github
|
||||
dockerImageTag: 0.2.29
|
||||
dockerImageTag: 0.2.30
|
||||
documentationUrl: https://docs.airbyte.io/integrations/sources/github
|
||||
icon: github.svg
|
||||
sourceType: api
|
||||
|
||||
@@ -2503,7 +2503,7 @@
|
||||
supportsNormalization: false
|
||||
supportsDBT: false
|
||||
supported_destination_sync_modes: []
|
||||
- dockerImage: "airbyte/source-github:0.2.29"
|
||||
- dockerImage: "airbyte/source-github:0.2.30"
|
||||
spec:
|
||||
documentationUrl: "https://docs.airbyte.com/integrations/sources/github"
|
||||
connectionSpecification:
|
||||
|
||||
@@ -12,5 +12,5 @@ RUN pip install .
|
||||
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
|
||||
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]
|
||||
|
||||
LABEL io.airbyte.version=0.2.29
|
||||
LABEL io.airbyte.version=0.2.30
|
||||
LABEL io.airbyte.name=airbyte/source-github
|
||||
|
||||
@@ -21,24 +21,30 @@ tests:
|
||||
future_state_path: "integration_tests/abnormal_state.json"
|
||||
cursor_paths:
|
||||
comments: ["airbytehq/integration-test", "updated_at"]
|
||||
commit_comment_reactions: ["airbytehq/integration-test", "55538825", "created_at"]
|
||||
commit_comments: ["airbytehq/integration-test", "updated_at"]
|
||||
commits: ["airbytehq/integration-test", "master", "created_at"]
|
||||
deployments: ["airbytehq/integration-test", "updated_at"]
|
||||
events: ["airbytehq/integration-test", "created_at"]
|
||||
issue_comment_reactions: ["airbytehq/integration-test", "907296275", "created_at"]
|
||||
issue_events: ["airbytehq/integration-test", "created_at"]
|
||||
issue_milestones: ["airbytehq/integration-test", "updated_at"]
|
||||
issue_reactions: ["airbytehq/integration-test", "11", "created_at"]
|
||||
issues: ["airbytehq/integration-test", "updated_at"]
|
||||
project_cards:
|
||||
["airbytehq/integration-test", "13167124", "17807006", "updated_at"]
|
||||
project_columns:
|
||||
["airbytehq/integration-test", "13167124", "updated_at"]
|
||||
projects: ["airbytehq/integration-test", "updated_at"]
|
||||
pull_request_comment_reactions: ["airbytehq/integration-test", "699253726", "created_at"]
|
||||
pull_request_stats: ["airbytehq/integration-test", "updated_at"]
|
||||
pull_requests: ["airbytehq/integration-test", "updated_at"]
|
||||
releases: ["airbytehq/integration-test", "created_at"]
|
||||
repositories: ["airbytehq", "updated_at"]
|
||||
review_comments: ["airbytehq/integration-test", "updated_at"]
|
||||
reviews: ["airbytehq/integration-test", "pull_request_updated_at"]
|
||||
stargazers: ["airbytehq/integration-test", "starred_at"]
|
||||
workflows: ["airbytehq/integration-test", "updated_at"]
|
||||
full_refresh:
|
||||
- config_path: "secrets/config.json"
|
||||
configured_catalog_path: "integration_tests/configured_catalog.json"
|
||||
|
||||
@@ -4,6 +4,16 @@
|
||||
"updated_at": "2121-06-30T10:22:10Z"
|
||||
}
|
||||
},
|
||||
"commit_comment_reactions": {
|
||||
"airbytehq/integration-test": {
|
||||
"55538825": {
|
||||
"created_at": "2121-12-31T23:59:59Z"
|
||||
},
|
||||
"55538840": {
|
||||
"created_at": "2121-12-31T23:59:59Z"
|
||||
}
|
||||
}
|
||||
},
|
||||
"commit_comments": {
|
||||
"airbytehq/integration-test": {
|
||||
"updated_at": "2121-04-30T20:36:17Z"
|
||||
@@ -24,6 +34,13 @@
|
||||
"created_at": "2121-06-29T03:44:45Z"
|
||||
}
|
||||
},
|
||||
"issue_comment_reactions": {
|
||||
"airbytehq/integration-test": {
|
||||
"907296275": {
|
||||
"created_at": "2121-12-31T23:59:59Z"
|
||||
}
|
||||
}
|
||||
},
|
||||
"issue_events": {
|
||||
"airbytehq/integration-test": {
|
||||
"created_at": "2121-06-29T01:49:42Z"
|
||||
@@ -34,6 +51,13 @@
|
||||
"updated_at": "2121-06-25T22:28:33Z"
|
||||
}
|
||||
},
|
||||
"issue_reactions": {
|
||||
"airbytehq/integration-test": {
|
||||
"11": {
|
||||
"created_at": "2121-12-31T23:59:59Z"
|
||||
}
|
||||
}
|
||||
},
|
||||
"issues": {
|
||||
"airbytehq/integration-test": {
|
||||
"updated_at": "2121-06-30T06:44:42Z"
|
||||
@@ -63,6 +87,13 @@
|
||||
"updated_at": "2121-06-28T17:24:51Z"
|
||||
}
|
||||
},
|
||||
"pull_request_comment_reactions": {
|
||||
"airbytehq/integration-test": {
|
||||
"699253726": {
|
||||
"created_at": "2121-12-31T23:59:59Z"
|
||||
}
|
||||
}
|
||||
},
|
||||
"pull_request_stats": {
|
||||
"airbytehq/integration-test": {
|
||||
"updated_at": "2121-06-29T02:04:57Z"
|
||||
@@ -78,6 +109,11 @@
|
||||
"created_at": "2121-06-23T23:57:07Z"
|
||||
}
|
||||
},
|
||||
"repositories": {
|
||||
"airbytehq": {
|
||||
"updated_at": "2121-12-31T23:59:59Z"
|
||||
}
|
||||
},
|
||||
"review_comments": {
|
||||
"airbytehq/integration-test": {
|
||||
"updated_at": "2121-06-23T23:57:07Z"
|
||||
@@ -92,5 +128,10 @@
|
||||
"airbytehq/integration-test": {
|
||||
"starred_at": "2121-06-29T02:04:57Z"
|
||||
}
|
||||
},
|
||||
"workflows": {
|
||||
"airbytehq/integration-test": {
|
||||
"updated_at": "2121-12-31T23:59:59Z"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -47,10 +47,10 @@
|
||||
"stream": {
|
||||
"name": "commit_comment_reactions",
|
||||
"json_schema": {},
|
||||
"supported_sync_modes": ["full_refresh"],
|
||||
"supported_sync_modes": ["full_refresh", "incremental"],
|
||||
"source_defined_primary_key": [["id"]]
|
||||
},
|
||||
"sync_mode": "full_refresh",
|
||||
"sync_mode": "incremental",
|
||||
"destination_sync_mode": "overwrite"
|
||||
},
|
||||
{
|
||||
@@ -109,10 +109,10 @@
|
||||
"stream": {
|
||||
"name": "issue_comment_reactions",
|
||||
"json_schema": {},
|
||||
"supported_sync_modes": ["full_refresh"],
|
||||
"supported_sync_modes": ["full_refresh", "incremental"],
|
||||
"source_defined_primary_key": [["id"]]
|
||||
},
|
||||
"sync_mode": "full_refresh",
|
||||
"sync_mode": "incremental",
|
||||
"destination_sync_mode": "overwrite"
|
||||
},
|
||||
{
|
||||
@@ -155,10 +155,10 @@
|
||||
"stream": {
|
||||
"name": "issue_reactions",
|
||||
"json_schema": {},
|
||||
"supported_sync_modes": ["full_refresh"],
|
||||
"supported_sync_modes": ["full_refresh", "incremental"],
|
||||
"source_defined_primary_key": [["id"]]
|
||||
},
|
||||
"sync_mode": "full_refresh",
|
||||
"sync_mode": "incremental",
|
||||
"destination_sync_mode": "overwrite"
|
||||
},
|
||||
{
|
||||
@@ -227,10 +227,10 @@
|
||||
"stream": {
|
||||
"name": "pull_request_comment_reactions",
|
||||
"json_schema": {},
|
||||
"supported_sync_modes": ["full_refresh"],
|
||||
"supported_sync_modes": ["full_refresh", "incremental"],
|
||||
"source_defined_primary_key": [["id"]]
|
||||
},
|
||||
"sync_mode": "full_refresh",
|
||||
"sync_mode": "incremental",
|
||||
"destination_sync_mode": "overwrite"
|
||||
},
|
||||
{
|
||||
@@ -286,10 +286,10 @@
|
||||
"stream": {
|
||||
"name": "repositories",
|
||||
"json_schema": {},
|
||||
"supported_sync_modes": ["full_refresh"],
|
||||
"supported_sync_modes": ["full_refresh", "incremental"],
|
||||
"source_defined_primary_key": [["id"]]
|
||||
},
|
||||
"sync_mode": "full_refresh",
|
||||
"sync_mode": "incremental",
|
||||
"destination_sync_mode": "overwrite"
|
||||
},
|
||||
{
|
||||
@@ -365,10 +365,10 @@
|
||||
"stream": {
|
||||
"name": "workflows",
|
||||
"json_schema": {},
|
||||
"supported_sync_modes": ["full_refresh"],
|
||||
"supported_sync_modes": ["full_refresh", "incremental"],
|
||||
"source_defined_primary_key": [["id"]]
|
||||
},
|
||||
"sync_mode": "full_refresh",
|
||||
"sync_mode": "incremental",
|
||||
"destination_sync_mode": "overwrite"
|
||||
},
|
||||
{
|
||||
|
||||
@@ -8,6 +8,7 @@ from setuptools import find_packages, setup
|
||||
MAIN_REQUIREMENTS = [
|
||||
"airbyte-cdk~=0.1.33",
|
||||
"vcrpy==4.1.1",
|
||||
"pendulum~=2.1.2",
|
||||
]
|
||||
|
||||
TEST_REQUIREMENTS = ["pytest~=6.1", "source-acceptance-test", "responses~=0.19.0"]
|
||||
|
||||
@@ -1,4 +1,28 @@
|
||||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"$ref": "reaction.json"
|
||||
"type": ["null", "object"],
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": ["null", "integer"]
|
||||
},
|
||||
"node_id": {
|
||||
"type": ["null", "string"]
|
||||
},
|
||||
"content": {
|
||||
"type": ["null", "string"]
|
||||
},
|
||||
"created_at": {
|
||||
"type": "string",
|
||||
"format": "date-time"
|
||||
},
|
||||
"user": {
|
||||
"$ref": "user.json"
|
||||
},
|
||||
"repository": {
|
||||
"type": "string"
|
||||
},
|
||||
"issue_number": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -244,7 +244,7 @@
|
||||
"format": "date-time"
|
||||
},
|
||||
"updated_at": {
|
||||
"type": ["null", "string"],
|
||||
"type": "string",
|
||||
"format": "date-time"
|
||||
},
|
||||
"permissions": {
|
||||
|
||||
@@ -11,14 +11,17 @@
|
||||
"type": ["null", "string"]
|
||||
},
|
||||
"created_at": {
|
||||
"type": ["null", "string"],
|
||||
"type": "string",
|
||||
"format": "date-time"
|
||||
},
|
||||
"user": {
|
||||
"$ref": "user.json"
|
||||
},
|
||||
"repository": {
|
||||
"type": ["null", "string"]
|
||||
"type": "string"
|
||||
},
|
||||
"comment_id": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,10 +18,12 @@
|
||||
"type": ["null", "string"]
|
||||
},
|
||||
"created_at": {
|
||||
"type": ["null", "string"]
|
||||
"type": ["null", "string"],
|
||||
"format": "date-time"
|
||||
},
|
||||
"updated_at": {
|
||||
"type": ["null", "string"]
|
||||
"type": "string",
|
||||
"format": "date-time"
|
||||
},
|
||||
"url": {
|
||||
"type": ["null", "string"]
|
||||
@@ -31,6 +33,9 @@
|
||||
},
|
||||
"badge_url": {
|
||||
"type": ["null", "string"]
|
||||
},
|
||||
"repository": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -179,6 +179,7 @@ class SourceGithub(AbstractSource):
|
||||
page_size = config.get("page_size_for_large_streams", DEFAULT_PAGE_SIZE_FOR_LARGE_STREAM)
|
||||
|
||||
organization_args = {"authenticator": authenticator, "organizations": organizations}
|
||||
organization_args_with_start_date = {**organization_args, "start_date": config["start_date"]}
|
||||
repository_args = {"authenticator": authenticator, "repositories": repositories, "page_size_for_large_streams": page_size}
|
||||
repository_args_with_start_date = {**repository_args, "start_date": config["start_date"]}
|
||||
|
||||
@@ -214,7 +215,7 @@ class SourceGithub(AbstractSource):
|
||||
PullRequestStats(parent=pull_requests_stream, **repository_args_with_start_date),
|
||||
pull_requests_stream,
|
||||
Releases(**repository_args_with_start_date),
|
||||
Repositories(**organization_args),
|
||||
Repositories(**organization_args_with_start_date),
|
||||
ReviewComments(**repository_args_with_start_date),
|
||||
Reviews(parent=pull_requests_stream, **repository_args_with_start_date),
|
||||
Stargazers(**repository_args_with_start_date),
|
||||
|
||||
@@ -8,6 +8,7 @@ from copy import deepcopy
|
||||
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Union
|
||||
from urllib import parse
|
||||
|
||||
import pendulum
|
||||
import requests
|
||||
from airbyte_cdk.models import SyncMode
|
||||
from airbyte_cdk.sources.streams.http import HttpStream, HttpSubStream
|
||||
@@ -172,7 +173,7 @@ class GithubStream(HttpStream, ABC):
|
||||
return record
|
||||
|
||||
|
||||
class SemiIncrementalGithubStream(GithubStream):
|
||||
class SemiIncrementalMixin:
|
||||
"""
|
||||
Semi incremental streams are also incremental but with one difference, they:
|
||||
- read all records;
|
||||
@@ -191,10 +192,19 @@ class SemiIncrementalGithubStream(GithubStream):
|
||||
# supporting this.
|
||||
is_sorted_descending = False
|
||||
|
||||
def __init__(self, start_date: str, **kwargs):
|
||||
def __init__(self, start_date: str = "", **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self._start_date = start_date
|
||||
|
||||
@property
|
||||
def __slice_key(self):
|
||||
if hasattr(self, "repositories"):
|
||||
return "repository"
|
||||
return "organization"
|
||||
|
||||
def convert_cursor_value(self, value):
|
||||
return value
|
||||
|
||||
@property
|
||||
def state_checkpoint_interval(self) -> Optional[int]:
|
||||
if not self.is_sorted_descending:
|
||||
@@ -206,18 +216,18 @@ class SemiIncrementalGithubStream(GithubStream):
|
||||
Return the latest state by comparing the cursor value in the latest record with the stream's most recent state
|
||||
object and returning an updated state object.
|
||||
"""
|
||||
repository = latest_record["repository"]
|
||||
updated_state = latest_record[self.cursor_field]
|
||||
stream_state_value = current_stream_state.get(repository, {}).get(self.cursor_field)
|
||||
slice_value = latest_record[self.__slice_key]
|
||||
updated_state = self.convert_cursor_value(latest_record[self.cursor_field])
|
||||
stream_state_value = current_stream_state.get(slice_value, {}).get(self.cursor_field)
|
||||
if stream_state_value:
|
||||
updated_state = max(updated_state, stream_state_value)
|
||||
current_stream_state.setdefault(repository, {})[self.cursor_field] = updated_state
|
||||
current_stream_state.setdefault(slice_value, {})[self.cursor_field] = updated_state
|
||||
return current_stream_state
|
||||
|
||||
def get_starting_point(self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any]) -> str:
|
||||
if stream_state:
|
||||
repository = stream_slice["repository"]
|
||||
stream_state_value = stream_state.get(repository, {}).get(self.cursor_field)
|
||||
slice_value = stream_slice[self.__slice_key]
|
||||
stream_state_value = stream_state.get(slice_value, {}).get(self.cursor_field)
|
||||
if stream_state_value:
|
||||
return max(self._start_date, stream_state_value)
|
||||
return self._start_date
|
||||
@@ -233,13 +243,14 @@ class SemiIncrementalGithubStream(GithubStream):
|
||||
for record in super().read_records(
|
||||
sync_mode=sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state
|
||||
):
|
||||
if record[self.cursor_field] > start_point:
|
||||
cursor_value = self.convert_cursor_value(record[self.cursor_field])
|
||||
if cursor_value > start_point:
|
||||
yield record
|
||||
elif self.is_sorted_descending and record[self.cursor_field] < start_point:
|
||||
elif self.is_sorted_descending and cursor_value < start_point:
|
||||
break
|
||||
|
||||
|
||||
class IncrementalGithubStream(SemiIncrementalGithubStream):
|
||||
class IncrementalMixin(SemiIncrementalMixin):
|
||||
def request_params(self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, **kwargs) -> MutableMapping[str, Any]:
|
||||
params = super().request_params(stream_state=stream_state, **kwargs)
|
||||
since_params = self.get_starting_point(stream_state=stream_state, stream_slice=stream_slice)
|
||||
@@ -323,11 +334,17 @@ class Organizations(GithubStream):
|
||||
return record
|
||||
|
||||
|
||||
class Repositories(Organizations):
|
||||
class Repositories(SemiIncrementalMixin, Organizations):
|
||||
"""
|
||||
API docs: https://docs.github.com/en/rest/reference/repos#list-organization-repositories
|
||||
"""
|
||||
|
||||
is_sorted_descending = True
|
||||
stream_base_params = {
|
||||
"sort": "updated",
|
||||
"direction": "desc",
|
||||
}
|
||||
|
||||
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
||||
return f"orgs/{stream_slice['organization']}/repos"
|
||||
|
||||
@@ -376,7 +393,7 @@ class Users(Organizations):
|
||||
# Below are semi incremental streams
|
||||
|
||||
|
||||
class Releases(SemiIncrementalGithubStream):
|
||||
class Releases(SemiIncrementalMixin, GithubStream):
|
||||
"""
|
||||
API docs: https://docs.github.com/en/rest/reference/repos#list-releases
|
||||
"""
|
||||
@@ -394,7 +411,7 @@ class Releases(SemiIncrementalGithubStream):
|
||||
return record
|
||||
|
||||
|
||||
class Events(SemiIncrementalGithubStream):
|
||||
class Events(SemiIncrementalMixin, GithubStream):
|
||||
"""
|
||||
API docs: https://docs.github.com/en/rest/reference/activity#list-repository-events
|
||||
"""
|
||||
@@ -402,7 +419,7 @@ class Events(SemiIncrementalGithubStream):
|
||||
cursor_field = "created_at"
|
||||
|
||||
|
||||
class PullRequests(SemiIncrementalGithubStream):
|
||||
class PullRequests(SemiIncrementalMixin, GithubStream):
|
||||
"""
|
||||
API docs: https://docs.github.com/en/rest/reference/pulls#list-pull-requests
|
||||
"""
|
||||
@@ -449,7 +466,7 @@ class PullRequests(SemiIncrementalGithubStream):
|
||||
return not self._first_read
|
||||
|
||||
|
||||
class CommitComments(SemiIncrementalGithubStream):
|
||||
class CommitComments(SemiIncrementalMixin, GithubStream):
|
||||
"""
|
||||
API docs: https://docs.github.com/en/rest/reference/repos#list-commit-comments-for-a-repository
|
||||
"""
|
||||
@@ -458,7 +475,7 @@ class CommitComments(SemiIncrementalGithubStream):
|
||||
return f"repos/{stream_slice['repository']}/comments"
|
||||
|
||||
|
||||
class IssueMilestones(SemiIncrementalGithubStream):
|
||||
class IssueMilestones(SemiIncrementalMixin, GithubStream):
|
||||
"""
|
||||
API docs: https://docs.github.com/en/rest/reference/issues#list-milestones
|
||||
"""
|
||||
@@ -474,7 +491,7 @@ class IssueMilestones(SemiIncrementalGithubStream):
|
||||
return f"repos/{stream_slice['repository']}/milestones"
|
||||
|
||||
|
||||
class Stargazers(SemiIncrementalGithubStream):
|
||||
class Stargazers(SemiIncrementalMixin, GithubStream):
|
||||
"""
|
||||
API docs: https://docs.github.com/en/rest/reference/activity#list-stargazers
|
||||
"""
|
||||
@@ -500,7 +517,7 @@ class Stargazers(SemiIncrementalGithubStream):
|
||||
return record
|
||||
|
||||
|
||||
class Projects(SemiIncrementalGithubStream):
|
||||
class Projects(SemiIncrementalMixin, GithubStream):
|
||||
"""
|
||||
API docs: https://docs.github.com/en/rest/reference/projects#list-repository-projects
|
||||
"""
|
||||
@@ -518,7 +535,7 @@ class Projects(SemiIncrementalGithubStream):
|
||||
return {**base_headers, **headers}
|
||||
|
||||
|
||||
class IssueEvents(SemiIncrementalGithubStream):
|
||||
class IssueEvents(SemiIncrementalMixin, GithubStream):
|
||||
"""
|
||||
API docs: https://docs.github.com/en/rest/reference/issues#list-issue-events-for-a-repository
|
||||
"""
|
||||
@@ -532,7 +549,7 @@ class IssueEvents(SemiIncrementalGithubStream):
|
||||
# Below are incremental streams
|
||||
|
||||
|
||||
class Comments(IncrementalGithubStream):
|
||||
class Comments(IncrementalMixin, GithubStream):
|
||||
"""
|
||||
API docs: https://docs.github.com/en/rest/reference/issues#list-issue-comments-for-a-repository
|
||||
"""
|
||||
@@ -543,7 +560,7 @@ class Comments(IncrementalGithubStream):
|
||||
return f"repos/{stream_slice['repository']}/issues/comments"
|
||||
|
||||
|
||||
class Commits(IncrementalGithubStream):
|
||||
class Commits(IncrementalMixin, GithubStream):
|
||||
"""
|
||||
API docs: https://docs.github.com/en/rest/reference/repos#list-commits
|
||||
|
||||
@@ -559,7 +576,7 @@ class Commits(IncrementalGithubStream):
|
||||
self.default_branches = default_branches
|
||||
|
||||
def request_params(self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, **kwargs) -> MutableMapping[str, Any]:
|
||||
params = super(IncrementalGithubStream, self).request_params(stream_state=stream_state, stream_slice=stream_slice, **kwargs)
|
||||
params = super(IncrementalMixin, self).request_params(stream_state=stream_state, stream_slice=stream_slice, **kwargs)
|
||||
params["since"] = self.get_starting_point(stream_state=stream_state, stream_slice=stream_slice)
|
||||
params["sha"] = stream_slice["branch"]
|
||||
return params
|
||||
@@ -616,7 +633,7 @@ class Commits(IncrementalGithubStream):
|
||||
return self._start_date
|
||||
|
||||
|
||||
class Issues(IncrementalGithubStream):
|
||||
class Issues(IncrementalMixin, GithubStream):
|
||||
"""
|
||||
API docs: https://docs.github.com/en/rest/reference/issues#list-repository-issues
|
||||
"""
|
||||
@@ -630,7 +647,7 @@ class Issues(IncrementalGithubStream):
|
||||
}
|
||||
|
||||
|
||||
class ReviewComments(IncrementalGithubStream):
|
||||
class ReviewComments(IncrementalMixin, GithubStream):
|
||||
"""
|
||||
API docs: https://docs.github.com/en/rest/reference/pulls#list-review-comments-in-a-repository
|
||||
"""
|
||||
@@ -644,7 +661,7 @@ class ReviewComments(IncrementalGithubStream):
|
||||
# Pull request substreams
|
||||
|
||||
|
||||
class PullRequestSubstream(HttpSubStream, SemiIncrementalGithubStream, ABC):
|
||||
class PullRequestSubstream(HttpSubStream, SemiIncrementalMixin, GithubStream, ABC):
|
||||
use_cache = False
|
||||
|
||||
def __init__(self, parent: PullRequests, **kwargs):
|
||||
@@ -675,9 +692,9 @@ class PullRequestSubstream(HttpSubStream, SemiIncrementalGithubStream, ABC):
|
||||
) -> Iterable[Mapping[str, Any]]:
|
||||
"""
|
||||
We've already determined the list of pull requests to run the stream against.
|
||||
Skip the start_point_map and cursor_field logic in SemiIncrementalGithubStream.read_records.
|
||||
Skip the start_point_map and cursor_field logic in SemiIncrementalMixin.read_records.
|
||||
"""
|
||||
yield from super(SemiIncrementalGithubStream, self).read_records(
|
||||
yield from super(SemiIncrementalMixin, self).read_records(
|
||||
sync_mode=sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state
|
||||
)
|
||||
|
||||
@@ -769,13 +786,15 @@ class PullRequestCommits(GithubStream):
|
||||
class ReactionStream(GithubStream, ABC):
|
||||
|
||||
parent_key = "id"
|
||||
copy_parent_key = "comment_id"
|
||||
use_cache = False
|
||||
cursor_field = "created_at"
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
self._stream_kwargs = deepcopy(kwargs)
|
||||
self._parent_stream = self.parent_entity(**kwargs)
|
||||
kwargs.pop("start_date", None)
|
||||
def __init__(self, start_date: str = "", **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
kwargs["start_date"] = start_date
|
||||
self._parent_stream = self.parent_entity(**kwargs)
|
||||
self._start_date = start_date
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
@@ -786,12 +805,50 @@ class ReactionStream(GithubStream, ABC):
|
||||
|
||||
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
|
||||
parent_path = self._parent_stream.path(stream_slice=stream_slice, **kwargs)
|
||||
return f"{parent_path}/{stream_slice[self.parent_key]}/reactions"
|
||||
return f"{parent_path}/{stream_slice[self.copy_parent_key]}/reactions"
|
||||
|
||||
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]:
|
||||
for stream_slice in super().stream_slices(**kwargs):
|
||||
for parent_record in self._parent_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice):
|
||||
yield {self.parent_key: parent_record[self.parent_key], "repository": stream_slice["repository"]}
|
||||
yield {self.copy_parent_key: parent_record[self.parent_key], "repository": stream_slice["repository"]}
|
||||
|
||||
def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]):
|
||||
repository = latest_record["repository"]
|
||||
parent_id = str(latest_record[self.copy_parent_key])
|
||||
updated_state = latest_record[self.cursor_field]
|
||||
stream_state_value = current_stream_state.get(repository, {}).get(parent_id, {}).get(self.cursor_field)
|
||||
if stream_state_value:
|
||||
updated_state = max(updated_state, stream_state_value)
|
||||
current_stream_state.setdefault(repository, {}).setdefault(parent_id, {})[self.cursor_field] = updated_state
|
||||
return current_stream_state
|
||||
|
||||
def get_starting_point(self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any]) -> str:
|
||||
if stream_state:
|
||||
repository = stream_slice["repository"]
|
||||
parent_id = str(stream_slice[self.copy_parent_key])
|
||||
stream_state_value = stream_state.get(repository, {}).get(parent_id, {}).get(self.cursor_field)
|
||||
if stream_state_value:
|
||||
return max(self._start_date, stream_state_value)
|
||||
return self._start_date
|
||||
|
||||
def read_records(
|
||||
self,
|
||||
sync_mode: SyncMode,
|
||||
cursor_field: List[str] = None,
|
||||
stream_slice: Mapping[str, Any] = None,
|
||||
stream_state: Mapping[str, Any] = None,
|
||||
) -> Iterable[Mapping[str, Any]]:
|
||||
starting_point = self.get_starting_point(stream_state=stream_state, stream_slice=stream_slice)
|
||||
for record in super().read_records(
|
||||
sync_mode=sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state
|
||||
):
|
||||
if record[self.cursor_field] > starting_point:
|
||||
yield record
|
||||
|
||||
def transform(self, record: MutableMapping[str, Any], stream_slice: Mapping[str, Any]) -> MutableMapping[str, Any]:
|
||||
record = super().transform(record, stream_slice)
|
||||
record[self.copy_parent_key] = stream_slice[self.copy_parent_key]
|
||||
return record
|
||||
|
||||
|
||||
class CommitCommentReactions(ReactionStream):
|
||||
@@ -817,6 +874,7 @@ class IssueReactions(ReactionStream):
|
||||
|
||||
parent_entity = Issues
|
||||
parent_key = "number"
|
||||
copy_parent_key = "issue_number"
|
||||
|
||||
|
||||
class PullRequestCommentReactions(ReactionStream):
|
||||
@@ -827,7 +885,7 @@ class PullRequestCommentReactions(ReactionStream):
|
||||
parent_entity = ReviewComments
|
||||
|
||||
|
||||
class Deployments(SemiIncrementalGithubStream):
|
||||
class Deployments(SemiIncrementalMixin, GithubStream):
|
||||
"""
|
||||
API docs: https://docs.github.com/en/rest/reference/deployments#list-deployments
|
||||
"""
|
||||
@@ -975,7 +1033,7 @@ class ProjectCards(GithubStream):
|
||||
return record
|
||||
|
||||
|
||||
class Workflows(GithubStream):
|
||||
class Workflows(SemiIncrementalMixin, GithubStream):
|
||||
"""
|
||||
Get all workflows of a GitHub repository
|
||||
API documentation: https://docs.github.com/en/rest/reference/actions#workflows
|
||||
@@ -987,7 +1045,10 @@ class Workflows(GithubStream):
|
||||
def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping]:
|
||||
response = response.json().get("workflows")
|
||||
for record in response:
|
||||
yield record
|
||||
yield self.transform(record=record, stream_slice=stream_slice)
|
||||
|
||||
def convert_cursor_value(self, value):
|
||||
return pendulum.parse(value).in_tz(tz="UTC").format("YYYY-MM-DDTHH:mm:ss[Z]")
|
||||
|
||||
|
||||
class WorkflowRuns(GithubStream):
|
||||
|
||||
@@ -30,7 +30,7 @@ def test_check_connection_repos_only():
|
||||
|
||||
@responses.activate
|
||||
def test_check_connection_repos_and_org_repos():
|
||||
repos = [{"name": f"name {i}", "full_name": f"full name {i}"} for i in range(1000)]
|
||||
repos = [{"name": f"name {i}", "full_name": f"full name {i}", "updated_at": "2020-01-01T00:00:00Z"} for i in range(1000)]
|
||||
responses.add("GET", "https://api.github.com/repos/airbyte/test", json={})
|
||||
responses.add("GET", "https://api.github.com/repos/airbyte/test2", json={})
|
||||
responses.add("GET", "https://api.github.com/orgs/airbytehq/repos", json=repos)
|
||||
@@ -45,7 +45,7 @@ def test_check_connection_repos_and_org_repos():
|
||||
|
||||
@responses.activate
|
||||
def test_check_connection_org_only():
|
||||
repos = [{"name": f"name {i}", "full_name": f"full name {i}"} for i in range(1000)]
|
||||
repos = [{"name": f"name {i}", "full_name": f"full name {i}", "updated_at": "2020-01-01T00:00:00Z"} for i in range(1000)]
|
||||
responses.add("GET", "https://api.github.com/orgs/airbytehq/repos", json=repos)
|
||||
|
||||
status = check_source("airbytehq/*")
|
||||
@@ -107,8 +107,8 @@ def test_generate_repositories():
|
||||
"GET",
|
||||
"https://api.github.com/orgs/docker/repos",
|
||||
json=[
|
||||
{"full_name": "docker/docker-py"},
|
||||
{"full_name": "docker/compose"},
|
||||
{"full_name": "docker/docker-py", "updated_at": "2020-01-01T00:00:00Z"},
|
||||
{"full_name": "docker/compose", "updated_at": "2020-01-01T00:00:00Z"},
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ from source_github.streams import (
|
||||
Branches,
|
||||
Collaborators,
|
||||
Comments,
|
||||
CommitCommentReactions,
|
||||
CommitComments,
|
||||
Commits,
|
||||
Deployments,
|
||||
@@ -48,7 +49,7 @@ DEFAULT_BACKOFF_DELAYS = [5, 10, 20, 40, 80]
|
||||
def test_internal_server_error_retry(time_mock):
|
||||
args = {"authenticator": None, "repositories": ["test_repo"], "start_date": "start_date", "page_size_for_large_streams": 30}
|
||||
stream = PullRequestCommentReactions(**args)
|
||||
stream_slice = {"repository": "test_repo", "id": "id"}
|
||||
stream_slice = {"repository": "test_repo", "comment_id": "id"}
|
||||
|
||||
time_mock.reset_mock()
|
||||
responses.add(
|
||||
@@ -157,20 +158,27 @@ def test_stream_repositories_404():
|
||||
|
||||
assert read_full_refresh(stream) == []
|
||||
assert len(responses.calls) == 1
|
||||
assert responses.calls[0].request.url == "https://api.github.com/orgs/org_name/repos?per_page=100"
|
||||
assert responses.calls[0].request.url == "https://api.github.com/orgs/org_name/repos?per_page=100&sort=updated&direction=desc"
|
||||
|
||||
|
||||
@responses.activate
|
||||
def test_stream_repositories_read():
|
||||
organization_args = {"organizations": ["org1", "org2"]}
|
||||
stream = Repositories(**organization_args)
|
||||
responses.add("GET", "https://api.github.com/orgs/org1/repos", json=[{"id": 1}, {"id": 2}])
|
||||
responses.add("GET", "https://api.github.com/orgs/org2/repos", json=[{"id": 3}])
|
||||
updated_at = "2020-01-01T00:00:00Z"
|
||||
responses.add(
|
||||
"GET", "https://api.github.com/orgs/org1/repos", json=[{"id": 1, "updated_at": updated_at}, {"id": 2, "updated_at": updated_at}]
|
||||
)
|
||||
responses.add("GET", "https://api.github.com/orgs/org2/repos", json=[{"id": 3, "updated_at": updated_at}])
|
||||
records = read_full_refresh(stream)
|
||||
assert records == [{"id": 1, "organization": "org1"}, {"id": 2, "organization": "org1"}, {"id": 3, "organization": "org2"}]
|
||||
assert records == [
|
||||
{"id": 1, "organization": "org1", "updated_at": updated_at},
|
||||
{"id": 2, "organization": "org1", "updated_at": updated_at},
|
||||
{"id": 3, "organization": "org2", "updated_at": updated_at},
|
||||
]
|
||||
assert len(responses.calls) == 2
|
||||
assert responses.calls[0].request.url == "https://api.github.com/orgs/org1/repos?per_page=100"
|
||||
assert responses.calls[1].request.url == "https://api.github.com/orgs/org2/repos?per_page=100"
|
||||
assert responses.calls[0].request.url == "https://api.github.com/orgs/org1/repos?per_page=100&sort=updated&direction=desc"
|
||||
assert responses.calls[1].request.url == "https://api.github.com/orgs/org2/repos?per_page=100&sort=updated&direction=desc"
|
||||
|
||||
|
||||
@responses.activate
|
||||
@@ -777,3 +785,82 @@ def test_stream_team_members_full_refresh():
|
||||
{"username": "login2", "organization": "org1", "team_slug": "team1"},
|
||||
{"username": "login2", "organization": "org1", "team_slug": "team2"},
|
||||
]
|
||||
|
||||
|
||||
@responses.activate
|
||||
def test_stream_commit_comment_reactions_incremental_read():
|
||||
|
||||
repository_args = {"repositories": ["airbytehq/integration-test"], "page_size_for_large_streams": 100}
|
||||
stream = CommitCommentReactions(**repository_args)
|
||||
|
||||
responses.add(
|
||||
"GET",
|
||||
"https://api.github.com/repos/airbytehq/integration-test/comments",
|
||||
json=[
|
||||
{"id": 55538825, "updated_at": "2021-01-01T15:00:00Z"},
|
||||
{"id": 55538826, "updated_at": "2021-01-01T16:00:00Z"},
|
||||
],
|
||||
)
|
||||
|
||||
responses.add(
|
||||
"GET",
|
||||
"https://api.github.com/repos/airbytehq/integration-test/comments/55538825/reactions",
|
||||
json=[
|
||||
{"id": 154935429, "created_at": "2022-01-01T15:00:00Z"},
|
||||
{"id": 154935430, "created_at": "2022-01-01T16:00:00Z"},
|
||||
],
|
||||
)
|
||||
|
||||
responses.add(
|
||||
"GET",
|
||||
"https://api.github.com/repos/airbytehq/integration-test/comments/55538826/reactions",
|
||||
json=[{"id": 154935431, "created_at": "2022-01-01T17:00:00Z"}],
|
||||
)
|
||||
|
||||
stream_state = {}
|
||||
records = read_incremental(stream, stream_state)
|
||||
|
||||
assert stream_state == {
|
||||
"airbytehq/integration-test": {
|
||||
"55538825": {"created_at": "2022-01-01T16:00:00Z"},
|
||||
"55538826": {"created_at": "2022-01-01T17:00:00Z"},
|
||||
}
|
||||
}
|
||||
|
||||
assert records == [
|
||||
{"id": 154935429, "comment_id": 55538825, "created_at": "2022-01-01T15:00:00Z", "repository": "airbytehq/integration-test"},
|
||||
{"id": 154935430, "comment_id": 55538825, "created_at": "2022-01-01T16:00:00Z", "repository": "airbytehq/integration-test"},
|
||||
{"id": 154935431, "comment_id": 55538826, "created_at": "2022-01-01T17:00:00Z", "repository": "airbytehq/integration-test"},
|
||||
]
|
||||
|
||||
responses.add(
|
||||
"GET",
|
||||
"https://api.github.com/repos/airbytehq/integration-test/comments",
|
||||
json=[
|
||||
{"id": 55538825, "updated_at": "2021-01-01T15:00:00Z"},
|
||||
{"id": 55538826, "updated_at": "2021-01-01T16:00:00Z"},
|
||||
{"id": 55538827, "updated_at": "2022-02-01T15:00:00Z"},
|
||||
],
|
||||
)
|
||||
|
||||
responses.add(
|
||||
"GET",
|
||||
"https://api.github.com/repos/airbytehq/integration-test/comments/55538826/reactions",
|
||||
json=[
|
||||
{"id": 154935431, "created_at": "2022-01-01T17:00:00Z"},
|
||||
{"id": 154935432, "created_at": "2022-02-01T16:00:00Z"},
|
||||
],
|
||||
)
|
||||
|
||||
responses.add(
|
||||
"GET",
|
||||
"https://api.github.com/repos/airbytehq/integration-test/comments/55538827/reactions",
|
||||
json=[{"id": 154935433, "created_at": "2022-02-01T17:00:00Z"}],
|
||||
)
|
||||
|
||||
records = read_incremental(stream, stream_state)
|
||||
|
||||
assert records == [
|
||||
{"id": 154935432, "comment_id": 55538826, "created_at": "2022-02-01T16:00:00Z", "repository": "airbytehq/integration-test"},
|
||||
{"id": 154935433, "comment_id": 55538827, "created_at": "2022-02-01T17:00:00Z", "repository": "airbytehq/integration-test"},
|
||||
]
|
||||
|
||||
@@ -113,6 +113,7 @@ Your token should have at least the `repo` scope. Depending on which streams you
|
||||
|
||||
| Version | Date | Pull Request | Subject |
|
||||
|:--------|:-----------| :--- |:-------------------------------------------------------------------------------------------------------------|
|
||||
| 0.2.30 | 2022-05-09 | [12294](https://github.com/airbytehq/airbyte/pull/12294) | Add incremental support for streams `CommitCommentReactions`, `IssueCommentReactions`, `IssueReactions`, `PullRequestCommentReactions`, `Repositories`, `Workflows` |
|
||||
| 0.2.29 | 2022-05-04 | [\#12482](https://github.com/airbytehq/airbyte/pull/12482) | Update input configuration copy |
|
||||
| 0.2.28 | 2022-04-21 | [11893](https://github.com/airbytehq/airbyte/pull/11893) | Add new streams `TeamMembers`, `TeamMemberships` |
|
||||
| 0.2.27 | 2022-04-02 | [11678](https://github.com/airbytehq/airbyte/pull/11678) | Fix "PAT Credentials" in spec |
|
||||
|
||||
Reference in New Issue
Block a user