diff --git a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/ef69ef6e-aa7f-4af1-a01d-ef775033524e.json b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/ef69ef6e-aa7f-4af1-a01d-ef775033524e.json index 2ec9e0beef4..7d3cdc98026 100644 --- a/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/ef69ef6e-aa7f-4af1-a01d-ef775033524e.json +++ b/airbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/ef69ef6e-aa7f-4af1-a01d-ef775033524e.json @@ -2,7 +2,7 @@ "sourceDefinitionId": "ef69ef6e-aa7f-4af1-a01d-ef775033524e", "name": "GitHub", "dockerRepository": "airbyte/source-github", - "dockerImageTag": "0.1.2", + "dockerImageTag": "0.1.3", "documentationUrl": "https://docs.airbyte.io/integrations/sources/github", "icon": "github.svg" } diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index d04cdf0fc1f..edeb6e86703 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -39,7 +39,7 @@ - sourceDefinitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e name: GitHub dockerRepository: airbyte/source-github - dockerImageTag: 0.1.2 + dockerImageTag: 0.1.3 documentationUrl: https://docs.airbyte.io/integrations/sources/github icon: github.svg - sourceDefinitionId: b5ea17b1-f170-46dc-bc31-cc744ca984c1 diff --git a/airbyte-integrations/connectors/source-github/Dockerfile b/airbyte-integrations/connectors/source-github/Dockerfile index 849a14dcca1..46caf4830d4 100644 --- a/airbyte-integrations/connectors/source-github/Dockerfile +++ b/airbyte-integrations/connectors/source-github/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.1.2 +LABEL io.airbyte.version=0.1.3 LABEL io.airbyte.name=airbyte/source-github diff --git a/airbyte-integrations/connectors/source-github/acceptance-test-config.yml b/airbyte-integrations/connectors/source-github/acceptance-test-config.yml index 8c98860b658..a505c34f073 100644 --- a/airbyte-integrations/connectors/source-github/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-github/acceptance-test-config.yml @@ -12,6 +12,7 @@ tests: basic_read: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/small_streams_configured_catalog.json" + timeout_seconds: 3600 # Below streams have too much records and because of it we are getting timeouts and hitting rate limits. # That's why we turned them off. # - config_path: "secrets/config.json" @@ -31,6 +32,7 @@ tests: full_refresh: - config_path: "secrets/config.json" configured_catalog_path: "integration_tests/small_streams_configured_catalog.json" + timeout_seconds: 3600 # Below streams have too much records and because of it we are getting timeouts and hitting rate limits. # That's why we turned them off. # - config_path: "secrets/config.json" diff --git a/airbyte-integrations/connectors/source-github/integration_tests/small_streams_configured_catalog.json b/airbyte-integrations/connectors/source-github/integration_tests/small_streams_configured_catalog.json index 306b265dc0e..975f5d0a7f7 100644 --- a/airbyte-integrations/connectors/source-github/integration_tests/small_streams_configured_catalog.json +++ b/airbyte-integrations/connectors/source-github/integration_tests/small_streams_configured_catalog.json @@ -10,6 +10,19 @@ "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" }, + { + "stream": { + "name": "comments", + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": false, + "default_cursor_field": ["updated_at"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "incremental", + "destination_sync_mode": "append", + "cursor_field": ["updated_at"] + }, { "stream": { "name": "commit_comments", @@ -36,6 +49,19 @@ "destination_sync_mode": "append", "cursor_field": ["updated_at"] }, + { + "stream": { + "name": "stargazers", + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": false, + "default_cursor_field": ["starred_at"], + "source_defined_primary_key": [["user_id"]] + }, + "sync_mode": "incremental", + "destination_sync_mode": "append", + "cursor_field": ["starred_at"] + }, { "stream": { "name": "collaborators", @@ -46,6 +72,19 @@ "sync_mode": "full_refresh", "destination_sync_mode": "overwrite" }, + { + "stream": { + "name": "pull_requests", + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": false, + "default_cursor_field": ["updated_at"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "incremental", + "destination_sync_mode": "append", + "cursor_field": ["updated_at"] + }, { "stream": { "name": "teams", @@ -69,6 +108,19 @@ "destination_sync_mode": "append", "cursor_field": ["updated_at"] }, + { + "stream": { + "name": "issues", + "json_schema": {}, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": false, + "default_cursor_field": ["updated_at"], + "source_defined_primary_key": [["id"]] + }, + "sync_mode": "incremental", + "destination_sync_mode": "append", + "cursor_field": ["updated_at"] + }, { "stream": { "name": "issue_labels", diff --git a/airbyte-integrations/connectors/source-github/source_github/schemas/comments.json b/airbyte-integrations/connectors/source-github/source_github/schemas/comments.json index 0ffd511e423..3ad898243f6 100644 --- a/airbyte-integrations/connectors/source-github/source_github/schemas/comments.json +++ b/airbyte-integrations/connectors/source-github/source_github/schemas/comments.json @@ -11,6 +11,9 @@ "node_id": { "type": ["null", "string"] }, + "user": { + "$ref": "user.json" + }, "url": { "type": ["null", "string"] }, diff --git a/airbyte-integrations/connectors/source-github/source_github/schemas/commit_comments.json b/airbyte-integrations/connectors/source-github/source_github/schemas/commit_comments.json index 9f8f2d32ab3..df7be687ffc 100644 --- a/airbyte-integrations/connectors/source-github/source_github/schemas/commit_comments.json +++ b/airbyte-integrations/connectors/source-github/source_github/schemas/commit_comments.json @@ -32,8 +32,8 @@ "commit_id": { "type": ["null", "string"] }, - "user_id": { - "type": ["null", "integer"] + "user": { + "$ref": "user.json" }, "created_at": { "type": ["null", "string"], diff --git a/airbyte-integrations/connectors/source-github/source_github/schemas/issues.json b/airbyte-integrations/connectors/source-github/source_github/schemas/issues.json index 5c22c402a7e..ca13ca5fb29 100644 --- a/airbyte-integrations/connectors/source-github/source_github/schemas/issues.json +++ b/airbyte-integrations/connectors/source-github/source_github/schemas/issues.json @@ -38,6 +38,9 @@ "title": { "type": ["null", "string"] }, + "user": { + "$ref": "user.json" + }, "body": { "type": ["null", "string"] }, diff --git a/airbyte-integrations/connectors/source-github/source_github/schemas/pull_requests.json b/airbyte-integrations/connectors/source-github/source_github/schemas/pull_requests.json index df1aac61044..84f86677f87 100644 --- a/airbyte-integrations/connectors/source-github/source_github/schemas/pull_requests.json +++ b/airbyte-integrations/connectors/source-github/source_github/schemas/pull_requests.json @@ -53,8 +53,8 @@ "title": { "type": ["null", "string"] }, - "user_id": { - "type": ["null", "integer"] + "user": { + "$ref": "user.json" }, "body": { "type": ["null", "string"] diff --git a/airbyte-integrations/connectors/source-github/source_github/schemas/reviews.json b/airbyte-integrations/connectors/source-github/source_github/schemas/reviews.json index 60489ec6db1..b3bb2397f3c 100644 --- a/airbyte-integrations/connectors/source-github/source_github/schemas/reviews.json +++ b/airbyte-integrations/connectors/source-github/source_github/schemas/reviews.json @@ -11,8 +11,8 @@ "node_id": { "type": ["null", "string"] }, - "user_id": { - "type": ["null", "integer"] + "user": { + "$ref": "user.json" }, "body": { "type": ["null", "string"] diff --git a/airbyte-integrations/connectors/source-github/source_github/schemas/shared/user.json b/airbyte-integrations/connectors/source-github/source_github/schemas/shared/user.json new file mode 100644 index 00000000000..c8d45aa2185 --- /dev/null +++ b/airbyte-integrations/connectors/source-github/source_github/schemas/shared/user.json @@ -0,0 +1,59 @@ +{ + "type": ["null", "object"], + "properties": { + "login": { + "type": ["null", "string"] + }, + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "avatar_url": { + "type": ["null", "string"] + }, + "gravatar_id": { + "type": ["null", "string"] + }, + "url": { + "type": ["null", "string"] + }, + "html_url": { + "type": ["null", "string"] + }, + "followers_url": { + "type": ["null", "string"] + }, + "following_url": { + "type": ["null", "string"] + }, + "gists_url": { + "type": ["null", "string"] + }, + "starred_url": { + "type": ["null", "string"] + }, + "subscriptions_url": { + "type": ["null", "string"] + }, + "organizations_url": { + "type": ["null", "string"] + }, + "repos_url": { + "type": ["null", "string"] + }, + "events_url": { + "type": ["null", "string"] + }, + "received_events_url": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "site_admin": { + "type": ["null", "boolean"] + } + } +} diff --git a/airbyte-integrations/connectors/source-github/source_github/schemas/stargazers.json b/airbyte-integrations/connectors/source-github/source_github/schemas/stargazers.json index 15518b63cc1..fbd26bfe46d 100644 --- a/airbyte-integrations/connectors/source-github/source_github/schemas/stargazers.json +++ b/airbyte-integrations/connectors/source-github/source_github/schemas/stargazers.json @@ -11,6 +11,9 @@ "starred_at": { "type": ["null", "string"], "format": "date-time" + }, + "user": { + "$ref": "user.json" } } } diff --git a/airbyte-integrations/connectors/source-github/source_github/streams.py b/airbyte-integrations/connectors/source-github/source_github/streams.py index 86a7f6a0969..91d650032fd 100644 --- a/airbyte-integrations/connectors/source-github/source_github/streams.py +++ b/airbyte-integrations/connectors/source-github/source_github/streams.py @@ -280,8 +280,6 @@ class Reviews(GithubStream): API docs: https://docs.github.com/en/rest/reference/pulls#list-reviews-for-a-pull-request """ - fields_to_minimize = ("user",) - def path( self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None ) -> str: @@ -361,7 +359,6 @@ class PullRequests(SemiIncrementalGithubStream): page_size = 50 fields_to_minimize = ( - "user", "milestone", "assignee", "labels", @@ -390,7 +387,6 @@ class PullRequests(SemiIncrementalGithubStream): for nested in ("head", "base"): entry = record.get(nested, {}) - entry["user_id"] = (record.get("head", {}).pop("user", {}) or {}).get("id") entry["repo_id"] = (record.get("head", {}).pop("repo", {}) or {}).get("id") return record @@ -416,8 +412,6 @@ class CommitComments(SemiIncrementalGithubStream): API docs: https://docs.github.com/en/rest/reference/repos#list-commit-comments-for-a-repository """ - fields_to_minimize = ("user",) - def path(self, **kwargs) -> str: return f"repos/{self.repository}/comments" @@ -446,7 +440,6 @@ class Stargazers(SemiIncrementalGithubStream): primary_key = "user_id" cursor_field = "starred_at" - fields_to_minimize = ("user",) def request_headers(self, **kwargs) -> Mapping[str, Any]: base_headers = super().request_headers(**kwargs) @@ -456,6 +449,15 @@ class Stargazers(SemiIncrementalGithubStream): return {**base_headers, **headers} + def transform(self, record: MutableMapping[str, Any]) -> MutableMapping[str, Any]: + """ + We need to provide the "user_id" for the primary_key attribute + and don't remove the whole "user" block from the record. + """ + record = super().transform(record=record) + record["user_id"] = record.get("user").get("id") + return record + class Projects(SemiIncrementalGithubStream): """ @@ -499,7 +501,6 @@ class Comments(IncrementalGithubStream): API docs: https://docs.github.com/en/rest/reference/issues#list-issue-comments-for-a-repository """ - fields_to_minimize = ("user",) page_size = 30 # `comments` is a large stream so it's better to set smaller page size. def path(self, **kwargs) -> str: @@ -538,7 +539,6 @@ class Issues(IncrementalGithubStream): page_size = 50 # `issues` is a large stream so it's better to set smaller page size. fields_to_minimize = ( - "user", "assignee", "milestone", "labels", diff --git a/docs/integrations/sources/github.md b/docs/integrations/sources/github.md index 06feb56da3c..30d6943d93d 100644 --- a/docs/integrations/sources/github.md +++ b/docs/integrations/sources/github.md @@ -41,7 +41,7 @@ meaning that they: Please, consider this behaviour when using those 8 incremental streams because it may affect you API call limits. -1. We are passing few parameters (`since`, `sort` and `direction`) to GitHub in order to filter records and sometimes +2. We are passing few parameters (`since`, `sort` and `direction`) to GitHub in order to filter records and sometimes for large streams specifying very distant `start_date` in the past may result in keep on getting error from GitHub instead of records (respective `WARN` log message will be outputted). In this case Specifying more recent `start_date` may help. @@ -83,6 +83,7 @@ Your token should have at least the `repo` scope. Depending on which streams you | Version | Date | Pull Request | Subject | | :------ | :-------- | :----- | :------ | +| 0.1.3 | 2021-08-03 | [5156](https://github.com/airbytehq/airbyte/pull/5156) | Extended existing schemas with `users` property for certain streams | | 0.1.2 | 2021-07-13 | [4708](https://github.com/airbytehq/airbyte/pull/4708) | Fix bug with IssueEvents stream and add handling for rate limiting | | 0.1.1 | 2021-07-07 | [4590](https://github.com/airbytehq/airbyte/pull/4590) | Fix schema in the `pull_request` stream | | 0.1.0 | 2021-07-06 | [4174](https://github.com/airbytehq/airbyte/pull/4174) | New Source: GitHub |