diff --git a/airbyte-integrations/connectors/source-github/source_github/schemas/commits.json b/airbyte-integrations/connectors/source-github/source_github/schemas/commits.json index 46e32ab7a7f..dd1f6c27cc0 100644 --- a/airbyte-integrations/connectors/source-github/source_github/schemas/commits.json +++ b/airbyte-integrations/connectors/source-github/source_github/schemas/commits.json @@ -96,11 +96,11 @@ } } }, - "author_id": { - "type": ["null", "integer"] + "author": { + "$ref": "user.json" }, - "committer_id": { - "type": ["null", "integer"] + "committer": { + "$ref": "user.json" }, "parents": { "type": ["null", "array"], diff --git a/airbyte-integrations/connectors/source-github/source_github/schemas/events.json b/airbyte-integrations/connectors/source-github/source_github/schemas/events.json index fca685e4802..79ac9cfc0cc 100644 --- a/airbyte-integrations/connectors/source-github/source_github/schemas/events.json +++ b/airbyte-integrations/connectors/source-github/source_github/schemas/events.json @@ -15,14 +15,42 @@ "type": ["null", "object"], "properties": {} }, - "repo_id": { - "type": ["null", "integer"] + "repo": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "name": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + } + } }, - "actor_id": { - "type": ["null", "integer"] + "actor": { + "$ref": "user.json" }, - "org_id": { - "type": ["null", "integer"] + "org": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "login": { + "type": ["null", "string"] + }, + "gravatar_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "avatar_url": { + "type": ["null", "string"] + } + } }, "created_at": { "type": ["null", "string"] diff --git a/airbyte-integrations/connectors/source-github/source_github/schemas/issue_events.json b/airbyte-integrations/connectors/source-github/source_github/schemas/issue_events.json index 8a0bb573155..8d7e3184514 100644 --- a/airbyte-integrations/connectors/source-github/source_github/schemas/issue_events.json +++ b/airbyte-integrations/connectors/source-github/source_github/schemas/issue_events.json @@ -14,8 +14,8 @@ "url": { "type": ["null", "string"] }, - "actor_id": { - "type": ["null", "integer"] + "actor": { + "$ref": "user.json" }, "event": { "type": ["null", "string"] @@ -30,8 +30,49 @@ "type": ["null", "string"], "format": "date-time" }, - "issue_id": { - "type": ["null", "integer"] + "issue": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "repository_url": { + "type": ["null", "string"] + }, + "labels_url": { + "type": ["null", "string"] + }, + "comments_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "number": { + "type": ["null", "integer"] + }, + "state": { + "type": ["null", "string"] + }, + "title": { + "type": ["null", "string"] + }, + "body": { + "type": ["null", "string"] + }, + "user": { + "$ref": "user.json" + } + } } } } diff --git a/airbyte-integrations/connectors/source-github/source_github/schemas/issue_milestones.json b/airbyte-integrations/connectors/source-github/source_github/schemas/issue_milestones.json index 6de25d9189f..9e0eed332b9 100644 --- a/airbyte-integrations/connectors/source-github/source_github/schemas/issue_milestones.json +++ b/airbyte-integrations/connectors/source-github/source_github/schemas/issue_milestones.json @@ -32,8 +32,8 @@ "description": { "type": ["null", "string"] }, - "creator_id": { - "type": ["null", "integer"] + "creator": { + "$ref": "user.json" }, "open_issues": { "type": ["null", "integer"] diff --git a/airbyte-integrations/connectors/source-github/source_github/schemas/issues.json b/airbyte-integrations/connectors/source-github/source_github/schemas/issues.json index a3680341fb6..bd723d579ab 100644 --- a/airbyte-integrations/connectors/source-github/source_github/schemas/issues.json +++ b/airbyte-integrations/connectors/source-github/source_github/schemas/issues.json @@ -50,20 +50,97 @@ "labels": { "type": ["null", "array"], "items": { - "type": ["null", "integer"] + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "color": { + "type": ["null", "string"] + }, + "default": { + "type": ["null", "boolean"] + } + } } }, - "assignee_id": { - "type": ["null", "integer"] + "assignee": { + "$ref": "user.json" }, "assignees": { "type": ["null", "array"], "items": { - "type": ["null", "integer"] + "$ref": "user.json" } }, - "milestone_id": { - "type": ["null", "integer"] + "milestone": { + "type": ["null", "object"], + "properties": { + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "labels_url": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "number": { + "type": ["null", "integer"] + }, + "state": { + "type": ["null", "string"] + }, + "title": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "creator": { + "$ref": "user.json" + }, + "open_issues": { + "type": ["null", "integer"] + }, + "closed_issues": { + "type": ["null", "integer"] + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "closed_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "due_on": { + "type": ["null", "string"], + "format": "date-time" + } + } }, "locked": { "type": ["null", "boolean"] diff --git a/airbyte-integrations/connectors/source-github/source_github/schemas/projects.json b/airbyte-integrations/connectors/source-github/source_github/schemas/projects.json index 3bd9053de12..de1f384c3e8 100644 --- a/airbyte-integrations/connectors/source-github/source_github/schemas/projects.json +++ b/airbyte-integrations/connectors/source-github/source_github/schemas/projects.json @@ -35,8 +35,8 @@ "state": { "type": ["null", "string"] }, - "creator_id": { - "type": ["null", "integer"] + "creator": { + "$ref": "user.json" }, "created_at": { "type": ["null", "string"], diff --git a/airbyte-integrations/connectors/source-github/source_github/schemas/pull_requests.json b/airbyte-integrations/connectors/source-github/source_github/schemas/pull_requests.json index 9c5f16e7da5..f64a7ea03c4 100644 --- a/airbyte-integrations/connectors/source-github/source_github/schemas/pull_requests.json +++ b/airbyte-integrations/connectors/source-github/source_github/schemas/pull_requests.json @@ -62,11 +62,88 @@ "labels": { "type": ["null", "array"], "items": { - "type": ["null", "integer"] + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "color": { + "type": ["null", "string"] + }, + "default": { + "type": ["null", "boolean"] + } + } } }, - "milestone_id": { - "type": ["null", "integer"] + "milestone": { + "type": ["null", "object"], + "properties": { + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "labels_url": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "number": { + "type": ["null", "integer"] + }, + "state": { + "type": ["null", "string"] + }, + "title": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "creator": { + "$ref": "user.json" + }, + "open_issues": { + "type": ["null", "integer"] + }, + "closed_issues": { + "type": ["null", "integer"] + }, + "created_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "updated_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "closed_at": { + "type": ["null", "string"], + "format": "date-time" + }, + "due_on": { + "type": ["null", "string"], + "format": "date-time" + } + } }, "active_lock_reason": { "type": ["null", "string"] @@ -90,25 +167,64 @@ "merge_commit_sha": { "type": ["null", "string"] }, - "assignee_id": { - "type": ["null", "integer"] + "assignee": { + "$ref": "user.json" }, "assignees": { "type": ["null", "array"], "items": { - "type": ["null", "integer"] + "$ref": "user.json" } }, "requested_reviewers": { "type": ["null", "array"], "items": { - "type": ["null", "integer"] + "$ref": "user.json" } }, "requested_teams": { "type": ["null", "array"], "items": { - "type": ["null", "integer"] + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "slug": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "privacy": { + "type": ["null", "string"] + }, + "permission": { + "type": ["null", "string"] + }, + "members_url": { + "type": ["null", "string"] + }, + "repositories_url": { + "type": ["null", "string"] + }, + "parent": { + "type": ["null", "object"], + "properties": {} + } + } } }, "head": { diff --git a/airbyte-integrations/connectors/source-github/source_github/schemas/releases.json b/airbyte-integrations/connectors/source-github/source_github/schemas/releases.json index e61814804e4..6c290db20f8 100644 --- a/airbyte-integrations/connectors/source-github/source_github/schemas/releases.json +++ b/airbyte-integrations/connectors/source-github/source_github/schemas/releases.json @@ -55,8 +55,8 @@ "type": ["null", "string"], "format": "date-time" }, - "author_id": { - "type": ["null", "integer"] + "author": { + "$ref": "user.json" }, "assets": { "type": ["null", "array"], diff --git a/airbyte-integrations/connectors/source-github/source_github/streams.py b/airbyte-integrations/connectors/source-github/source_github/streams.py index 7a8c50e1550..86e5df9c90d 100644 --- a/airbyte-integrations/connectors/source-github/source_github/streams.py +++ b/airbyte-integrations/connectors/source-github/source_github/streams.py @@ -73,11 +73,6 @@ class GithubStream(HttpStream, ABC): stream_base_params = {} - # Fields in below variable will be used for data clearing. Put there keys which represent: - # - objects `{}`, like `user`, `actor` etc. - # - lists `[]`, like `labels`, `assignees` etc. - fields_to_minimize = () - def __init__(self, repositories: List[str], **kwargs): super().__init__(**kwargs) self.repositories = repositories @@ -185,34 +180,6 @@ class GithubStream(HttpStream, ABC): yield self.transform(record=record, repository=stream_slice["repository"]) def transform(self, record: MutableMapping[str, Any], repository: str = None, organization: str = None) -> MutableMapping[str, Any]: - """ - Use this method to: - - remove excessive fields from record; - - minify subelements in the record. For example, if you have `reviews` record which looks like this: - { - "id": 671782869, - "node_id": "MDE3OlB1bGxSZXF1ZXN0UmV2aWV3NjcxNzgyODY5", - "user": { - "login": "keu", - "id": 1619536, - ... - }, - "body": "lgtm, just small comment", - ... - } - - `user` subelement contains almost all possible fields fo user and it's not optimal to store such data in - `reviews` record. We may leave only `user.id` field and save in to `user_id` field in the record. So if you - need to do something similar with your record you may use this method. - """ - for field in self.fields_to_minimize: - field_value = record.pop(field, None) - if field_value is None: - record[field] = field_value - elif isinstance(field_value, dict): - record[f"{field}_id"] = field_value.get("id") if field_value else None - elif isinstance(field_value, list): - record[field] = [value.get("id") for value in field_value] if repository: record["repository"] = repository if organization: @@ -473,7 +440,6 @@ class Releases(SemiIncrementalGithubStream): """ cursor_field = "created_at" - fields_to_minimize = ("author",) def transform(self, record: MutableMapping[str, Any], repository: str = None, **kwargs) -> MutableMapping[str, Any]: record = super().transform(record=record, repository=repository) @@ -492,11 +458,6 @@ class Events(SemiIncrementalGithubStream): """ cursor_field = "created_at" - fields_to_minimize = ( - "actor", - "repo", - "org", - ) class PullRequests(SemiIncrementalGithubStream): @@ -505,14 +466,6 @@ class PullRequests(SemiIncrementalGithubStream): """ page_size = 50 - fields_to_minimize = ( - "milestone", - "assignee", - "labels", - "assignees", - "requested_reviewers", - "requested_teams", - ) def __init__(self, **kwargs): super().__init__(**kwargs) @@ -568,7 +521,6 @@ class IssueMilestones(SemiIncrementalGithubStream): """ is_sorted_descending = True - fields_to_minimize = ("creator",) stream_base_params = { "state": "all", "sort": "updated", @@ -610,7 +562,6 @@ class Projects(SemiIncrementalGithubStream): API docs: https://docs.github.com/en/rest/reference/projects#list-repository-projects """ - fields_to_minimize = ("creator",) stream_base_params = { "state": "all", } @@ -630,10 +581,6 @@ class IssueEvents(SemiIncrementalGithubStream): """ cursor_field = "created_at" - fields_to_minimize = ( - "actor", - "issue", - ) def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: return f"repos/{stream_slice['repository']}/issues/events" @@ -660,10 +607,6 @@ class Commits(IncrementalGithubStream): primary_key = "sha" cursor_field = "created_at" - fields_to_minimize = ( - "author", - "committer", - ) def transform(self, record: MutableMapping[str, Any], repository: str = None, **kwargs) -> MutableMapping[str, Any]: record = super().transform(record=record, repository=repository) @@ -684,12 +627,6 @@ class Issues(IncrementalGithubStream): page_size = 50 # `issues` is a large stream so it's better to set smaller page size. - fields_to_minimize = ( - "assignee", - "milestone", - "labels", - "assignees", - ) stream_base_params = { "state": "all", "sort": "updated",