1
0
mirror of synced 2026-01-08 12:03:02 -05:00

Populate the PK from the Singer discovery run (#2713) (#4789)

When running Singer discovery, use the `key_properties` field to populate the `source_defined_primary_key` stream meta.
This commit is contained in:
Artjoms Iskovs
2021-08-09 18:11:22 +01:00
committed by GitHub
parent 219877f778
commit f790fee57c
6 changed files with 194 additions and 16 deletions

View File

@@ -124,11 +124,14 @@ class SingerHelper:
field_object["type"] = SingerHelper._parse_type(field_object["type"])
@staticmethod
def singer_catalog_to_airbyte_catalog(singer_catalog: Dict[str, any], sync_mode_overrides: Dict[str, SyncModeInfo]) -> AirbyteCatalog:
def singer_catalog_to_airbyte_catalog(
singer_catalog: Dict[str, any], sync_mode_overrides: Dict[str, SyncModeInfo], primary_key_overrides: Dict[str, List[str]]
) -> AirbyteCatalog:
"""
:param singer_catalog:
:param sync_mode_overrides: A dict from stream name to the sync modes it should use. Each stream in this dict must exist in the Singer catalog,
but not every stream in the catalog should exist in this
:param primary_key_overrides: A dict of stream name -> list of fields to be used as PKs.
:return: Airbyte Catalog
"""
airbyte_streams = []
@@ -138,28 +141,41 @@ class SingerHelper:
airbyte_stream = AirbyteStream(name=name, json_schema=schema)
if name in sync_mode_overrides:
override_sync_modes(airbyte_stream, sync_mode_overrides[name])
else:
set_sync_modes_from_metadata(airbyte_stream, stream.get("metadata", []))
if name in primary_key_overrides:
airbyte_stream.source_defined_primary_key = [[k] for k in primary_key_overrides[name]]
elif stream.get("key_properties"):
airbyte_stream.source_defined_primary_key = [[k] for k in stream["key_properties"]]
airbyte_streams += [airbyte_stream]
return AirbyteCatalog(streams=airbyte_streams)
@staticmethod
def get_catalogs(logger, shell_command: str, sync_mode_overrides: Dict[str, SyncModeInfo], excluded_streams: List) -> Catalogs:
def _read_singer_catalog(logger, shell_command: str) -> Mapping[str, Any]:
completed_process = subprocess.run(
shell_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True
)
for line in completed_process.stderr.splitlines():
logger.log_by_prefix(line, "ERROR")
singer_catalog = json.loads(completed_process.stdout)
return json.loads(completed_process.stdout)
@staticmethod
def get_catalogs(
logger,
shell_command: str,
sync_mode_overrides: Dict[str, SyncModeInfo],
primary_key_overrides: Dict[str, List[str]],
excluded_streams: List,
) -> Catalogs:
singer_catalog = SingerHelper._read_singer_catalog(logger, shell_command)
streams = singer_catalog.get("streams", [])
if streams and excluded_streams:
singer_catalog["streams"] = [stream for stream in streams if stream["stream"] not in excluded_streams]
airbyte_catalog = SingerHelper.singer_catalog_to_airbyte_catalog(singer_catalog, sync_mode_overrides)
airbyte_catalog = SingerHelper.singer_catalog_to_airbyte_catalog(singer_catalog, sync_mode_overrides, primary_key_overrides)
return Catalogs(singer_catalog=singer_catalog, airbyte_catalog=airbyte_catalog)
@staticmethod

View File

@@ -100,7 +100,9 @@ class SingerSource(Source):
def _discover_internal(self, logger: AirbyteLogger, config_path: str) -> Catalogs:
cmd = self.discover_cmd(logger, config_path)
catalogs = SingerHelper.get_catalogs(logger, cmd, self.get_sync_mode_overrides(), self.get_excluded_streams())
catalogs = SingerHelper.get_catalogs(
logger, cmd, self.get_sync_mode_overrides(), self.get_primary_key_overrides(), self.get_excluded_streams()
)
return catalogs
def check(self, logger: AirbyteLogger, config_container: ConfigContainer) -> AirbyteConnectionStatus:
@@ -147,6 +149,14 @@ class SingerSource(Source):
"""
return {}
def get_primary_key_overrides(self) -> Dict[str, List[str]]:
"""
Similar to get_sync_mode_overrides but for primary keys.
:return: A dict from stream name to the list of primary key fields for the stream.
"""
return {}
def get_excluded_streams(self) -> List[str]:
"""
This method provide ability to exclude some streams from catalog