1
0
mirror of synced 2025-12-25 02:09:19 -05:00

Google Sheets: add row_id to rows and use as primary key (#19215)

* source-google-sheets: add row_id to rows and use as primary key

* Update Dockerfile

* Update google-sheets.md

* Update Dockerfile

* Update google-sheets.md

* auto-bump connector version

Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
Co-authored-by: Octavia Squidington III <octavia-squidington-iii@users.noreply.github.com>
This commit is contained in:
Mahdi Dibaiee
2022-11-14 18:42:00 +00:00
committed by GitHub
parent d59275d5f9
commit 44ac470d28
7 changed files with 43 additions and 14 deletions

View File

@@ -27,14 +27,18 @@ class TestHelpers(unittest.TestCase):
sheet_name = "sheet1"
header_values = ["h1", "h2", "h3"]
props = {header: {"type": "string"} for header in header_values}
props["row_id"] = {"type": "integer"}
expected_stream = AirbyteStream(
name=sheet_name,
json_schema={
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"required": ["row_id"],
# For simplicity, the type of every cell is a string
"properties": {header: {"type": "string"} for header in header_values},
"properties": props,
},
source_defined_primary_key=[["row_id"]],
supported_sync_modes=[SyncMode.full_refresh],
)
@@ -57,15 +61,20 @@ class TestHelpers(unittest.TestCase):
header_values = ["h1", "h1", "h3"]
# h1 is ignored because it is duplicate
expected_stream_header_values = ["h3"]
props = {
"h3": {"type": "string"},
"row_id": {"type": "integer"},
}
expected_stream = AirbyteStream(
name=sheet_name,
json_schema={
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"required": ["row_id"],
# For simplicity, the type of every cell is a string
"properties": {header: {"type": "string"} for header in expected_stream_header_values},
"properties": props,
},
source_defined_primary_key=[["row_id"]],
supported_sync_modes=[SyncMode.full_refresh],
)
@@ -81,9 +90,11 @@ class TestHelpers(unittest.TestCase):
json_schema={
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"required": ["row_id"],
# For simplicity, the type of every cell is a string
"properties": {"h1": {"type": "string"}},
"properties": {"h1": {"type": "string"}, "row_id": {"type": "integer"}},
},
source_defined_primary_key=[["row_id"]],
supported_sync_modes=[SyncMode.full_refresh],
)
actual_stream = Helpers.headers_to_airbyte_stream(logger, sheet_name, header_values)
@@ -143,10 +154,11 @@ class TestHelpers(unittest.TestCase):
sheet = "my_sheet"
cell_values = ["v1", "v2", "v3", "v4"]
column_index_to_name = {0: "c1", 3: "c4"}
row_id = 1
actual = Helpers.row_data_to_record_message(sheet, cell_values, column_index_to_name)
actual = Helpers.row_data_to_record_message(sheet, row_id, cell_values, column_index_to_name)
expected = AirbyteRecordMessage(stream=sheet, data={"c1": "v1", "c4": "v4"}, emitted_at=1)
expected = AirbyteRecordMessage(stream=sheet, data={"row_id": row_id, "c1": "v1", "c4": "v4"}, emitted_at=1)
self.assertEqual(expected.stream, actual.stream)
self.assertEqual(expected.data, actual.data)