When running Singer discovery, use the `key_properties` field to populate the `source_defined_primary_key` stream meta.
This commit is contained in:
@@ -124,11 +124,14 @@ class SingerHelper:
|
||||
field_object["type"] = SingerHelper._parse_type(field_object["type"])
|
||||
|
||||
@staticmethod
|
||||
def singer_catalog_to_airbyte_catalog(singer_catalog: Dict[str, any], sync_mode_overrides: Dict[str, SyncModeInfo]) -> AirbyteCatalog:
|
||||
def singer_catalog_to_airbyte_catalog(
|
||||
singer_catalog: Dict[str, any], sync_mode_overrides: Dict[str, SyncModeInfo], primary_key_overrides: Dict[str, List[str]]
|
||||
) -> AirbyteCatalog:
|
||||
"""
|
||||
:param singer_catalog:
|
||||
:param sync_mode_overrides: A dict from stream name to the sync modes it should use. Each stream in this dict must exist in the Singer catalog,
|
||||
but not every stream in the catalog should exist in this
|
||||
:param primary_key_overrides: A dict of stream name -> list of fields to be used as PKs.
|
||||
:return: Airbyte Catalog
|
||||
"""
|
||||
airbyte_streams = []
|
||||
@@ -138,28 +141,41 @@ class SingerHelper:
|
||||
airbyte_stream = AirbyteStream(name=name, json_schema=schema)
|
||||
if name in sync_mode_overrides:
|
||||
override_sync_modes(airbyte_stream, sync_mode_overrides[name])
|
||||
|
||||
else:
|
||||
set_sync_modes_from_metadata(airbyte_stream, stream.get("metadata", []))
|
||||
|
||||
if name in primary_key_overrides:
|
||||
airbyte_stream.source_defined_primary_key = [[k] for k in primary_key_overrides[name]]
|
||||
elif stream.get("key_properties"):
|
||||
airbyte_stream.source_defined_primary_key = [[k] for k in stream["key_properties"]]
|
||||
|
||||
airbyte_streams += [airbyte_stream]
|
||||
return AirbyteCatalog(streams=airbyte_streams)
|
||||
|
||||
@staticmethod
|
||||
def get_catalogs(logger, shell_command: str, sync_mode_overrides: Dict[str, SyncModeInfo], excluded_streams: List) -> Catalogs:
|
||||
def _read_singer_catalog(logger, shell_command: str) -> Mapping[str, Any]:
|
||||
completed_process = subprocess.run(
|
||||
shell_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True
|
||||
)
|
||||
|
||||
for line in completed_process.stderr.splitlines():
|
||||
logger.log_by_prefix(line, "ERROR")
|
||||
|
||||
singer_catalog = json.loads(completed_process.stdout)
|
||||
return json.loads(completed_process.stdout)
|
||||
|
||||
@staticmethod
|
||||
def get_catalogs(
|
||||
logger,
|
||||
shell_command: str,
|
||||
sync_mode_overrides: Dict[str, SyncModeInfo],
|
||||
primary_key_overrides: Dict[str, List[str]],
|
||||
excluded_streams: List,
|
||||
) -> Catalogs:
|
||||
singer_catalog = SingerHelper._read_singer_catalog(logger, shell_command)
|
||||
streams = singer_catalog.get("streams", [])
|
||||
if streams and excluded_streams:
|
||||
singer_catalog["streams"] = [stream for stream in streams if stream["stream"] not in excluded_streams]
|
||||
airbyte_catalog = SingerHelper.singer_catalog_to_airbyte_catalog(singer_catalog, sync_mode_overrides)
|
||||
|
||||
airbyte_catalog = SingerHelper.singer_catalog_to_airbyte_catalog(singer_catalog, sync_mode_overrides, primary_key_overrides)
|
||||
return Catalogs(singer_catalog=singer_catalog, airbyte_catalog=airbyte_catalog)
|
||||
|
||||
@staticmethod
|
||||
|
||||
@@ -100,7 +100,9 @@ class SingerSource(Source):
|
||||
|
||||
def _discover_internal(self, logger: AirbyteLogger, config_path: str) -> Catalogs:
|
||||
cmd = self.discover_cmd(logger, config_path)
|
||||
catalogs = SingerHelper.get_catalogs(logger, cmd, self.get_sync_mode_overrides(), self.get_excluded_streams())
|
||||
catalogs = SingerHelper.get_catalogs(
|
||||
logger, cmd, self.get_sync_mode_overrides(), self.get_primary_key_overrides(), self.get_excluded_streams()
|
||||
)
|
||||
return catalogs
|
||||
|
||||
def check(self, logger: AirbyteLogger, config_container: ConfigContainer) -> AirbyteConnectionStatus:
|
||||
@@ -147,6 +149,14 @@ class SingerSource(Source):
|
||||
"""
|
||||
return {}
|
||||
|
||||
def get_primary_key_overrides(self) -> Dict[str, List[str]]:
|
||||
"""
|
||||
Similar to get_sync_mode_overrides but for primary keys.
|
||||
|
||||
:return: A dict from stream name to the list of primary key fields for the stream.
|
||||
"""
|
||||
return {}
|
||||
|
||||
def get_excluded_streams(self) -> List[str]:
|
||||
"""
|
||||
This method provide ability to exclude some streams from catalog
|
||||
|
||||
Reference in New Issue
Block a user