1
0
mirror of synced 2025-12-20 10:32:35 -05:00

Source GitHub: add stream ProjectsV2 (#30731)

Co-authored-by: artem1205 <artem1205@users.noreply.github.com>
This commit is contained in:
Artem Inzhyyants
2023-09-28 08:46:29 +02:00
committed by GitHub
parent 0537dde7d2
commit 61a63ec2ff
11 changed files with 9507 additions and 2518 deletions

View File

@@ -16,7 +16,14 @@ from airbyte_cdk.sources.streams.http.exceptions import DefaultBackoffException
from requests.exceptions import HTTPError
from . import constants
from .graphql import CursorStorage, QueryReactions, get_query_issue_reactions, get_query_pull_requests, get_query_reviews
from .graphql import (
CursorStorage,
QueryReactions,
get_query_issue_reactions,
get_query_projectsV2,
get_query_pull_requests,
get_query_reviews,
)
from .utils import getter
@@ -127,6 +134,14 @@ class GithubStreamABC(HttpStream, ABC):
if reset_time:
return max(float(reset_time) - time.time(), min_backoff_time)
def check_graphql_rate_limited(self, response_json) -> bool:
errors = response_json.get("errors")
if errors:
for error in errors:
if error.get("type") == "RATE_LIMITED":
return True
return False
def read_records(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping[str, Any]]:
# get out the stream_slice parts for later use.
organisation = stream_slice.get("organization", "")
@@ -204,14 +219,6 @@ class GithubStream(GithubStreamABC):
for repository in self.repositories:
yield {"repository": repository}
def check_graphql_rate_limited(self, response_json) -> bool:
errors = response_json.get("errors")
if errors:
for error in errors:
if error.get("type") == "RATE_LIMITED":
return True
return False
def get_error_display_message(self, exception: BaseException) -> Optional[str]:
if (
isinstance(exception, DefaultBackoffException)
@@ -716,12 +723,8 @@ class ReviewComments(IncrementalMixin, GithubStream):
return f"repos/{stream_slice['repository']}/pulls/comments"
class PullRequestStats(SemiIncrementalMixin, GithubStream):
"""
API docs: https://docs.github.com/en/graphql/reference/objects#pullrequest
"""
class GitHubGraphQLStream(GithubStream, ABC):
is_sorted = "asc"
http_method = "POST"
def path(
@@ -729,15 +732,27 @@ class PullRequestStats(SemiIncrementalMixin, GithubStream):
) -> str:
return "graphql"
def raise_error_from_response(self, response_json):
if "errors" in response_json:
raise Exception(str(response_json["errors"]))
def should_retry(self, response: requests.Response) -> bool:
return True if response.json().get("errors") else super().should_retry(response)
def _get_name(self, repository):
def _get_repository_name(self, repository: Mapping[str, Any]) -> str:
return repository["owner"]["login"] + "/" + repository["name"]
def request_params(
self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
) -> MutableMapping[str, Any]:
return {}
class PullRequestStats(SemiIncrementalMixin, GitHubGraphQLStream):
"""
API docs: https://docs.github.com/en/graphql/reference/objects#pullrequest
"""
large_stream = True
is_sorted = "asc"
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
self.raise_error_from_response(response_json=response.json())
repository = response.json()["data"]["repository"]
if repository:
nodes = repository["pullRequests"]["nodes"]
@@ -745,7 +760,7 @@ class PullRequestStats(SemiIncrementalMixin, GithubStream):
record["review_comments"] = sum([node["comments"]["totalCount"] for node in record["review_comments"]["nodes"]])
record["comments"] = record["comments"]["totalCount"]
record["commits"] = record["commits"]["totalCount"]
record["repository"] = self._get_name(repository)
record["repository"] = self._get_repository_name(repository)
if record["merged_by"]:
record["merged_by"]["type"] = record["merged_by"].pop("__typename")
yield record
@@ -757,11 +772,6 @@ class PullRequestStats(SemiIncrementalMixin, GithubStream):
if pageInfo["hasNextPage"]:
return {"after": pageInfo["endCursor"]}
def request_params(
self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
) -> MutableMapping[str, Any]:
return {}
def request_body_json(
self,
stream_state: Mapping[str, Any],
@@ -783,13 +793,12 @@ class PullRequestStats(SemiIncrementalMixin, GithubStream):
return {**base_headers, **headers}
class Reviews(SemiIncrementalMixin, GithubStream):
class Reviews(SemiIncrementalMixin, GitHubGraphQLStream):
"""
API docs: https://docs.github.com/en/graphql/reference/objects#pullrequestreview
"""
is_sorted = False
http_method = "POST"
cursor_field = "updated_at"
def __init__(self, **kwargs):
@@ -797,20 +806,6 @@ class Reviews(SemiIncrementalMixin, GithubStream):
self.pull_requests_cursor = {}
self.reviews_cursors = {}
def path(
self, *, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
) -> str:
return "graphql"
def request_params(
self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
) -> MutableMapping[str, Any]:
return {}
def raise_error_from_response(self, response_json):
if "errors" in response_json:
raise Exception(str(response_json["errors"]))
def _get_records(self, pull_request, repository_name):
"yield review records from pull_request"
for record in pull_request["reviews"]["nodes"]:
@@ -827,14 +822,10 @@ class Reviews(SemiIncrementalMixin, GithubStream):
}
yield record
def _get_name(self, repository):
return repository["owner"]["login"] + "/" + repository["name"]
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
self.raise_error_from_response(response_json=response.json())
repository = response.json()["data"]["repository"]
if repository:
repository_name = self._get_name(repository)
repository_name = self._get_repository_name(repository)
if "pullRequests" in repository:
for pull_request in repository["pullRequests"]["nodes"]:
yield from self._get_records(pull_request, repository_name)
@@ -844,7 +835,7 @@ class Reviews(SemiIncrementalMixin, GithubStream):
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
repository = response.json()["data"]["repository"]
if repository:
repository_name = self._get_name(repository)
repository_name = self._get_repository_name(repository)
reviews_cursors = self.reviews_cursors.setdefault(repository_name, {})
if "pullRequests" in repository:
if repository["pullRequests"]["pageInfo"]["hasNextPage"]:
@@ -909,6 +900,44 @@ class PullRequestCommits(GithubStream):
return record
class ProjectsV2(SemiIncrementalMixin, GitHubGraphQLStream):
"""
API docs: https://docs.github.com/en/graphql/reference/objects#pullrequest
"""
is_sorted = "asc"
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
repository = response.json()["data"]["repository"]
if repository:
nodes = repository["projectsV2"]["nodes"]
for record in nodes:
record["owner_id"] = record.pop("owner").get("id")
record["repository"] = self._get_repository_name(repository)
yield record
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
repository = response.json()["data"]["repository"]
if repository:
page_info = repository["projectsV2"]["pageInfo"]
if page_info["hasNextPage"]:
return {"after": page_info["endCursor"]}
def request_body_json(
self,
stream_state: Mapping[str, Any],
stream_slice: Mapping[str, Any] = None,
next_page_token: Mapping[str, Any] = None,
) -> Optional[Mapping]:
organization, name = stream_slice["repository"].split("/")
if next_page_token:
next_page_token = next_page_token["after"]
query = get_query_projectsV2(
owner=organization, name=name, first=self.page_size, after=next_page_token, direction=self.is_sorted.upper()
)
return {"query": query}
# Reactions streams
@@ -995,13 +1024,12 @@ class IssueCommentReactions(ReactionStream):
parent_entity = Comments
class IssueReactions(SemiIncrementalMixin, GithubStream):
class IssueReactions(SemiIncrementalMixin, GitHubGraphQLStream):
"""
https://docs.github.com/en/graphql/reference/objects#issue
https://docs.github.com/en/graphql/reference/objects#reaction
"""
http_method = "POST"
cursor_field = "created_at"
def __init__(self, **kwargs):
@@ -1009,18 +1037,6 @@ class IssueReactions(SemiIncrementalMixin, GithubStream):
self.issues_cursor = {}
self.reactions_cursors = {}
def path(
self, *, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
) -> str:
return "graphql"
def raise_error_from_response(self, response_json):
if "errors" in response_json:
raise Exception(str(response_json["errors"]))
def _get_name(self, repository):
return repository["owner"]["login"] + "/" + repository["name"]
def _get_reactions_from_issue(self, issue, repository_name):
for reaction in issue["reactions"]["nodes"]:
reaction["repository"] = repository_name
@@ -1029,10 +1045,9 @@ class IssueReactions(SemiIncrementalMixin, GithubStream):
yield reaction
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
self.raise_error_from_response(response_json=response.json())
repository = response.json()["data"]["repository"]
if repository:
repository_name = self._get_name(repository)
repository_name = self._get_repository_name(repository)
if "issues" in repository:
for issue in repository["issues"]["nodes"]:
yield from self._get_reactions_from_issue(issue, repository_name)
@@ -1042,7 +1057,7 @@ class IssueReactions(SemiIncrementalMixin, GithubStream):
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
repository = response.json()["data"]["repository"]
if repository:
repository_name = self._get_name(repository)
repository_name = self._get_repository_name(repository)
reactions_cursors = self.reactions_cursors.setdefault(repository_name, {})
if "issues" in repository:
if repository["issues"]["pageInfo"]["hasNextPage"]:
@@ -1074,14 +1089,13 @@ class IssueReactions(SemiIncrementalMixin, GithubStream):
return {"query": query}
class PullRequestCommentReactions(SemiIncrementalMixin, GithubStream):
class PullRequestCommentReactions(SemiIncrementalMixin, GitHubGraphQLStream):
"""
API docs:
https://docs.github.com/en/graphql/reference/objects#pullrequestreviewcomment
https://docs.github.com/en/graphql/reference/objects#reaction
"""
http_method = "POST"
cursor_field = "created_at"
def __init__(self, **kwargs):
@@ -1089,21 +1103,9 @@ class PullRequestCommentReactions(SemiIncrementalMixin, GithubStream):
self.cursor_storage = CursorStorage(["PullRequest", "PullRequestReview", "PullRequestReviewComment", "Reaction"])
self.query_reactions = QueryReactions()
def path(
self, *, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
) -> str:
return "graphql"
def raise_error_from_response(self, response_json):
if "errors" in response_json:
raise Exception(str(response_json["errors"]))
def _get_name(self, repository):
return repository["owner"]["login"] + "/" + repository["name"]
def _get_reactions_from_comment(self, comment, repository):
for reaction in comment["reactions"]["nodes"]:
reaction["repository"] = self._get_name(repository)
reaction["repository"] = self._get_repository_name(repository)
reaction["comment_id"] = comment["id"]
if reaction["user"]:
reaction["user"]["type"] = "User"
@@ -1122,7 +1124,6 @@ class PullRequestCommentReactions(SemiIncrementalMixin, GithubStream):
yield from self._get_reactions_from_pull_request(pull_request, repository)
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
self.raise_error_from_response(response_json=response.json())
data = response.json()["data"]
repository = data.get("repository")
if repository:
@@ -1180,11 +1181,6 @@ class PullRequestCommentReactions(SemiIncrementalMixin, GithubStream):
link_to_object[link], pageInfo["endCursor"], node[link]["totalCount"], parent_id=node.get("node_id")
)
def request_params(
self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
) -> MutableMapping[str, Any]:
return {}
def request_body_json(
self,
stream_state: Mapping[str, Any],