1
0
mirror of synced 2025-12-25 02:09:19 -05:00
* Source GitHub: define pull_request_stats and reviews streams as pull_request substreams
* Source GitHub: Bump version to 0.2.6
* Source GitHub: Apply PR review suggestions
Co-authored-by: Jérémy Lourenço <jeremy@lourenco.io>
This commit is contained in:
Yevhenii
2021-11-24 16:56:25 +02:00
committed by GitHub
parent b28f20bb53
commit 96581f12af
6 changed files with 87 additions and 80 deletions

View File

@@ -12,5 +12,5 @@ RUN pip install .
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]
LABEL io.airbyte.version=0.2.5
LABEL io.airbyte.version=0.2.6
LABEL io.airbyte.name=airbyte/source-github

View File

@@ -153,37 +153,40 @@ class SourceGithub(AbstractSource):
repositories = repos + organization_repos
organizations = list({org.split("/")[0] for org in repositories})
full_refresh_args = {"authenticator": authenticator, "repositories": repositories}
incremental_args = {**full_refresh_args, "start_date": config["start_date"]}
organization_args = {"authenticator": authenticator, "organizations": organizations}
default_branches, branches_to_pull = self._get_branches_data(config.get("branch", ""), full_refresh_args)
repository_args = {"authenticator": authenticator, "repositories": repositories}
repository_args_with_start_date = {**repository_args, "start_date": config["start_date"]}
default_branches, branches_to_pull = self._get_branches_data(config.get("branch", ""), repository_args)
pull_requests_stream = PullRequests(**repository_args_with_start_date)
return [
Assignees(**full_refresh_args),
Branches(**full_refresh_args),
Collaborators(**full_refresh_args),
Comments(**incremental_args),
CommitCommentReactions(**incremental_args),
CommitComments(**incremental_args),
Commits(**incremental_args, branches_to_pull=branches_to_pull, default_branches=default_branches),
Events(**incremental_args),
IssueCommentReactions(**incremental_args),
IssueEvents(**incremental_args),
IssueLabels(**full_refresh_args),
IssueMilestones(**incremental_args),
IssueReactions(**incremental_args),
Issues(**incremental_args),
Assignees(**repository_args),
Branches(**repository_args),
Collaborators(**repository_args),
Comments(**repository_args_with_start_date),
CommitCommentReactions(**repository_args_with_start_date),
CommitComments(**repository_args_with_start_date),
Commits(**repository_args_with_start_date, branches_to_pull=branches_to_pull, default_branches=default_branches),
Events(**repository_args_with_start_date),
IssueCommentReactions(**repository_args_with_start_date),
IssueEvents(**repository_args_with_start_date),
IssueLabels(**repository_args),
IssueMilestones(**repository_args_with_start_date),
IssueReactions(**repository_args_with_start_date),
Issues(**repository_args_with_start_date),
Organizations(**organization_args),
Projects(**incremental_args),
PullRequestCommentReactions(**incremental_args),
PullRequestStats(**full_refresh_args),
PullRequests(**incremental_args),
Releases(**incremental_args),
Projects(**repository_args_with_start_date),
PullRequestCommentReactions(**repository_args_with_start_date),
PullRequestStats(parent=pull_requests_stream, **repository_args),
PullRequests(**repository_args_with_start_date),
Releases(**repository_args_with_start_date),
Repositories(**organization_args),
ReviewComments(**incremental_args),
Reviews(**full_refresh_args),
Stargazers(**incremental_args),
Tags(**full_refresh_args),
ReviewComments(**repository_args_with_start_date),
Reviews(parent=pull_requests_stream, **repository_args),
Stargazers(**repository_args_with_start_date),
Tags(**repository_args),
Teams(**organization_args),
Users(**organization_args),
]

View File

@@ -12,7 +12,7 @@ from urllib import parse
import requests
import vcr
from airbyte_cdk.models import SyncMode
from airbyte_cdk.sources.streams.http import HttpStream
from airbyte_cdk.sources.streams.http import HttpStream, HttpSubStream
from requests.exceptions import HTTPError
from vcr.cassette import Cassette
@@ -187,7 +187,6 @@ class GithubStream(HttpStream, ABC):
return record
class SemiIncrementalGithubStream(GithubStream):
"""
Semi incremental streams are also incremental but with one difference, they:
@@ -286,55 +285,6 @@ class Assignees(GithubStream):
"""
class PullRequestStats(GithubStream):
"""
API docs: https://docs.github.com/en/rest/reference/pulls#get-a-pull-request
"""
top_level_stream = False
@property
def record_keys(self) -> List[str]:
return list(self.get_json_schema()["properties"].keys())
def path(
self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
) -> str:
return f"repos/{stream_slice['repository']}/pulls/{stream_slice['pull_request_number']}"
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]:
for stream_slice in super().stream_slices(**kwargs):
pull_requests_stream = PullRequests(authenticator=self.authenticator, repositories=[stream_slice["repository"]], start_date="")
for pull_request in pull_requests_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice):
yield {"pull_request_number": pull_request["number"], "repository": stream_slice["repository"]}
def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any], **kwargs) -> Iterable[Mapping]:
yield self.transform(response.json(), repository=stream_slice["repository"])
def transform(self, record: MutableMapping[str, Any], repository: str = None) -> MutableMapping[str, Any]:
record = super().transform(record=record, repository=repository)
return {key: value for key, value in record.items() if key in self.record_keys}
class Reviews(GithubStream):
"""
API docs: https://docs.github.com/en/rest/reference/pulls#list-reviews-for-a-pull-request
"""
top_level_stream = False
def path(
self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
) -> str:
return f"repos/{stream_slice['repository']}/pulls/{stream_slice['pull_request_number']}/reviews"
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]:
for stream_slice in super().stream_slices(**kwargs):
pull_requests_stream = PullRequests(authenticator=self.authenticator, repositories=[stream_slice["repository"]], start_date="")
for pull_request in pull_requests_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice):
yield {"pull_request_number": pull_request["number"], "repository": stream_slice["repository"]}
class Branches(GithubStream):
"""
API docs: https://docs.github.com/en/rest/reference/repos#list-branches
@@ -727,6 +677,59 @@ class ReviewComments(IncrementalGithubStream):
return f"repos/{stream_slice['repository']}/pulls/comments"
# Pull request substreams
class PullRequestSubstream(HttpSubStream, GithubStream, ABC):
top_level_stream = False
def __init__(self, parent: PullRequests, **kwargs):
super().__init__(parent=parent, **kwargs)
def stream_slices(
self, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None
) -> Iterable[Optional[Mapping[str, Any]]]:
parent_stream_slices = super().stream_slices(
sync_mode=sync_mode, cursor_field=cursor_field, stream_state=stream_state
)
for parent_stream_slice in parent_stream_slices:
yield {'pull_request_number': parent_stream_slice['parent']['number'], 'repository': parent_stream_slice['parent']['repository']}
class PullRequestStats(PullRequestSubstream):
"""
API docs: https://docs.github.com/en/rest/reference/pulls#get-a-pull-request
"""
@property
def record_keys(self) -> List[str]:
return list(self.get_json_schema()['properties'].keys())
def path(
self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
) -> str:
return f"repos/{stream_slice['repository']}/pulls/{stream_slice['pull_request_number']}"
def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any], **kwargs) -> Iterable[Mapping]:
yield self.transform(response.json(), repository=stream_slice['repository'])
def transform(self, record: MutableMapping[str, Any], repository: str = None) -> MutableMapping[str, Any]:
record = super().transform(record=record, repository=repository)
return {key: value for key, value in record.items() if key in self.record_keys}
class Reviews(PullRequestSubstream):
"""
API docs: https://docs.github.com/en/rest/reference/pulls#list-reviews-for-a-pull-request
"""
def path(
self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
) -> str:
return f"repos/{stream_slice['repository']}/pulls/{stream_slice['pull_request_number']}/reviews"
# Reactions streams