* Source GitHub: define pull_request_stats and reviews streams as pull_request substreams * Source GitHub: Bump version to 0.2.6 * Source GitHub: Apply PR review suggestions Co-authored-by: Jérémy Lourenço <jeremy@lourenco.io>
This commit is contained in:
@@ -12,5 +12,5 @@ RUN pip install .
|
||||
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
|
||||
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]
|
||||
|
||||
LABEL io.airbyte.version=0.2.5
|
||||
LABEL io.airbyte.version=0.2.6
|
||||
LABEL io.airbyte.name=airbyte/source-github
|
||||
|
||||
@@ -153,37 +153,40 @@ class SourceGithub(AbstractSource):
|
||||
repositories = repos + organization_repos
|
||||
|
||||
organizations = list({org.split("/")[0] for org in repositories})
|
||||
full_refresh_args = {"authenticator": authenticator, "repositories": repositories}
|
||||
incremental_args = {**full_refresh_args, "start_date": config["start_date"]}
|
||||
|
||||
organization_args = {"authenticator": authenticator, "organizations": organizations}
|
||||
default_branches, branches_to_pull = self._get_branches_data(config.get("branch", ""), full_refresh_args)
|
||||
repository_args = {"authenticator": authenticator, "repositories": repositories}
|
||||
repository_args_with_start_date = {**repository_args, "start_date": config["start_date"]}
|
||||
|
||||
default_branches, branches_to_pull = self._get_branches_data(config.get("branch", ""), repository_args)
|
||||
pull_requests_stream = PullRequests(**repository_args_with_start_date)
|
||||
|
||||
return [
|
||||
Assignees(**full_refresh_args),
|
||||
Branches(**full_refresh_args),
|
||||
Collaborators(**full_refresh_args),
|
||||
Comments(**incremental_args),
|
||||
CommitCommentReactions(**incremental_args),
|
||||
CommitComments(**incremental_args),
|
||||
Commits(**incremental_args, branches_to_pull=branches_to_pull, default_branches=default_branches),
|
||||
Events(**incremental_args),
|
||||
IssueCommentReactions(**incremental_args),
|
||||
IssueEvents(**incremental_args),
|
||||
IssueLabels(**full_refresh_args),
|
||||
IssueMilestones(**incremental_args),
|
||||
IssueReactions(**incremental_args),
|
||||
Issues(**incremental_args),
|
||||
Assignees(**repository_args),
|
||||
Branches(**repository_args),
|
||||
Collaborators(**repository_args),
|
||||
Comments(**repository_args_with_start_date),
|
||||
CommitCommentReactions(**repository_args_with_start_date),
|
||||
CommitComments(**repository_args_with_start_date),
|
||||
Commits(**repository_args_with_start_date, branches_to_pull=branches_to_pull, default_branches=default_branches),
|
||||
Events(**repository_args_with_start_date),
|
||||
IssueCommentReactions(**repository_args_with_start_date),
|
||||
IssueEvents(**repository_args_with_start_date),
|
||||
IssueLabels(**repository_args),
|
||||
IssueMilestones(**repository_args_with_start_date),
|
||||
IssueReactions(**repository_args_with_start_date),
|
||||
Issues(**repository_args_with_start_date),
|
||||
Organizations(**organization_args),
|
||||
Projects(**incremental_args),
|
||||
PullRequestCommentReactions(**incremental_args),
|
||||
PullRequestStats(**full_refresh_args),
|
||||
PullRequests(**incremental_args),
|
||||
Releases(**incremental_args),
|
||||
Projects(**repository_args_with_start_date),
|
||||
PullRequestCommentReactions(**repository_args_with_start_date),
|
||||
PullRequestStats(parent=pull_requests_stream, **repository_args),
|
||||
PullRequests(**repository_args_with_start_date),
|
||||
Releases(**repository_args_with_start_date),
|
||||
Repositories(**organization_args),
|
||||
ReviewComments(**incremental_args),
|
||||
Reviews(**full_refresh_args),
|
||||
Stargazers(**incremental_args),
|
||||
Tags(**full_refresh_args),
|
||||
ReviewComments(**repository_args_with_start_date),
|
||||
Reviews(parent=pull_requests_stream, **repository_args),
|
||||
Stargazers(**repository_args_with_start_date),
|
||||
Tags(**repository_args),
|
||||
Teams(**organization_args),
|
||||
Users(**organization_args),
|
||||
]
|
||||
|
||||
@@ -12,7 +12,7 @@ from urllib import parse
|
||||
import requests
|
||||
import vcr
|
||||
from airbyte_cdk.models import SyncMode
|
||||
from airbyte_cdk.sources.streams.http import HttpStream
|
||||
from airbyte_cdk.sources.streams.http import HttpStream, HttpSubStream
|
||||
from requests.exceptions import HTTPError
|
||||
from vcr.cassette import Cassette
|
||||
|
||||
@@ -187,7 +187,6 @@ class GithubStream(HttpStream, ABC):
|
||||
|
||||
return record
|
||||
|
||||
|
||||
class SemiIncrementalGithubStream(GithubStream):
|
||||
"""
|
||||
Semi incremental streams are also incremental but with one difference, they:
|
||||
@@ -286,55 +285,6 @@ class Assignees(GithubStream):
|
||||
"""
|
||||
|
||||
|
||||
class PullRequestStats(GithubStream):
|
||||
"""
|
||||
API docs: https://docs.github.com/en/rest/reference/pulls#get-a-pull-request
|
||||
"""
|
||||
|
||||
top_level_stream = False
|
||||
|
||||
@property
|
||||
def record_keys(self) -> List[str]:
|
||||
return list(self.get_json_schema()["properties"].keys())
|
||||
|
||||
def path(
|
||||
self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
|
||||
) -> str:
|
||||
return f"repos/{stream_slice['repository']}/pulls/{stream_slice['pull_request_number']}"
|
||||
|
||||
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]:
|
||||
for stream_slice in super().stream_slices(**kwargs):
|
||||
pull_requests_stream = PullRequests(authenticator=self.authenticator, repositories=[stream_slice["repository"]], start_date="")
|
||||
for pull_request in pull_requests_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice):
|
||||
yield {"pull_request_number": pull_request["number"], "repository": stream_slice["repository"]}
|
||||
|
||||
def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any], **kwargs) -> Iterable[Mapping]:
|
||||
yield self.transform(response.json(), repository=stream_slice["repository"])
|
||||
|
||||
def transform(self, record: MutableMapping[str, Any], repository: str = None) -> MutableMapping[str, Any]:
|
||||
record = super().transform(record=record, repository=repository)
|
||||
return {key: value for key, value in record.items() if key in self.record_keys}
|
||||
|
||||
|
||||
class Reviews(GithubStream):
|
||||
"""
|
||||
API docs: https://docs.github.com/en/rest/reference/pulls#list-reviews-for-a-pull-request
|
||||
"""
|
||||
|
||||
top_level_stream = False
|
||||
|
||||
def path(
|
||||
self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
|
||||
) -> str:
|
||||
return f"repos/{stream_slice['repository']}/pulls/{stream_slice['pull_request_number']}/reviews"
|
||||
|
||||
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]:
|
||||
for stream_slice in super().stream_slices(**kwargs):
|
||||
pull_requests_stream = PullRequests(authenticator=self.authenticator, repositories=[stream_slice["repository"]], start_date="")
|
||||
for pull_request in pull_requests_stream.read_records(sync_mode=SyncMode.full_refresh, stream_slice=stream_slice):
|
||||
yield {"pull_request_number": pull_request["number"], "repository": stream_slice["repository"]}
|
||||
|
||||
|
||||
class Branches(GithubStream):
|
||||
"""
|
||||
API docs: https://docs.github.com/en/rest/reference/repos#list-branches
|
||||
@@ -727,6 +677,59 @@ class ReviewComments(IncrementalGithubStream):
|
||||
return f"repos/{stream_slice['repository']}/pulls/comments"
|
||||
|
||||
|
||||
# Pull request substreams
|
||||
|
||||
|
||||
class PullRequestSubstream(HttpSubStream, GithubStream, ABC):
|
||||
top_level_stream = False
|
||||
|
||||
def __init__(self, parent: PullRequests, **kwargs):
|
||||
super().__init__(parent=parent, **kwargs)
|
||||
|
||||
def stream_slices(
|
||||
self, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None
|
||||
) -> Iterable[Optional[Mapping[str, Any]]]:
|
||||
parent_stream_slices = super().stream_slices(
|
||||
sync_mode=sync_mode, cursor_field=cursor_field, stream_state=stream_state
|
||||
)
|
||||
|
||||
for parent_stream_slice in parent_stream_slices:
|
||||
yield {'pull_request_number': parent_stream_slice['parent']['number'], 'repository': parent_stream_slice['parent']['repository']}
|
||||
|
||||
|
||||
class PullRequestStats(PullRequestSubstream):
|
||||
"""
|
||||
API docs: https://docs.github.com/en/rest/reference/pulls#get-a-pull-request
|
||||
"""
|
||||
|
||||
@property
|
||||
def record_keys(self) -> List[str]:
|
||||
return list(self.get_json_schema()['properties'].keys())
|
||||
|
||||
def path(
|
||||
self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
|
||||
) -> str:
|
||||
return f"repos/{stream_slice['repository']}/pulls/{stream_slice['pull_request_number']}"
|
||||
|
||||
def parse_response(self, response: requests.Response, stream_slice: Mapping[str, Any], **kwargs) -> Iterable[Mapping]:
|
||||
yield self.transform(response.json(), repository=stream_slice['repository'])
|
||||
|
||||
def transform(self, record: MutableMapping[str, Any], repository: str = None) -> MutableMapping[str, Any]:
|
||||
record = super().transform(record=record, repository=repository)
|
||||
return {key: value for key, value in record.items() if key in self.record_keys}
|
||||
|
||||
|
||||
class Reviews(PullRequestSubstream):
|
||||
"""
|
||||
API docs: https://docs.github.com/en/rest/reference/pulls#list-reviews-for-a-pull-request
|
||||
"""
|
||||
|
||||
def path(
|
||||
self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
|
||||
) -> str:
|
||||
return f"repos/{stream_slice['repository']}/pulls/{stream_slice['pull_request_number']}/reviews"
|
||||
|
||||
|
||||
# Reactions streams
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user