1
0
mirror of synced 2026-01-16 18:06:29 -05:00
Files
airbyte/airbyte-cdk/python/airbyte_cdk/sources/declarative/schema/json_schema.py
Alexandre Girard 62500af93b get module name from sys.modules (#17779)
* get module name from sys.modules

* bump

* fix comment

* throw exception

* fix unittests

* Add missing files

* remove debug prints

* indent
2022-10-10 13:54:09 -07:00

89 lines
3.7 KiB
Python

#
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
#
import json
import pkgutil
import sys
from dataclasses import InitVar, dataclass, field
from typing import Any, Mapping, Union
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
from airbyte_cdk.sources.declarative.types import Config
from dataclasses_jsonschema import JsonSchemaMixin
def _default_file_path() -> str:
# schema files are always in "source_<connector_name>/schemas/<stream_name>.json
# the connector's module name can be inferred by looking at the modules loaded and look for the one starting with source_
source_modules = [
k for k, v in sys.modules.items() if "source_" in k # example: ['source_exchange_rates', 'source_exchange_rates.source']
]
if not source_modules:
raise RuntimeError("Expected at least one module starting with 'source_'")
module = source_modules[0].split(".")[0]
return f"./{module}/schemas/{{{{options['name']}}}}.json"
@dataclass
class JsonSchema(SchemaLoader, JsonSchemaMixin):
"""
Loads the schema from a json file
Attributes:
file_path (Union[InterpolatedString, str]): The path to the json file describing the schema
name (str): The stream's name
config (Config): The user-provided configuration as specified by the source's spec
options (Mapping[str, Any]): Additional arguments to pass to the string interpolation if needed
"""
config: Config
options: InitVar[Mapping[str, Any]]
file_path: Union[InterpolatedString, str] = field(default=None)
def __post_init__(self, options: Mapping[str, Any]):
if not self.file_path:
self.file_path = _default_file_path()
self.file_path = InterpolatedString.create(self.file_path, options=options)
def get_json_schema(self) -> Mapping[str, Any]:
# todo: It is worth revisiting if we can replace file_path with just file_name if every schema is in the /schemas directory
# this would require that we find a creative solution to store or retrieve source_name in here since the files are mounted there
json_schema_path = self._get_json_filepath()
resource, schema_path = self.extract_resource_and_schema_path(json_schema_path)
raw_json_file = pkgutil.get_data(resource, schema_path)
if not raw_json_file:
raise IOError(f"Cannot find file {json_schema_path}")
try:
raw_schema = json.loads(raw_json_file)
except ValueError as err:
raise RuntimeError(f"Invalid JSON file format for file {json_schema_path}") from err
return raw_schema
def _get_json_filepath(self):
return self.file_path.eval(self.config)
@staticmethod
def extract_resource_and_schema_path(json_schema_path: str) -> (str, str):
"""
When the connector is running on a docker container, package_data is accessible from the resource (source_<name>), so we extract
the resource from the first part of the schema path and the remaining path is used to find the schema file. This is a slight
hack to identify the source name while we are in the airbyte_cdk module.
:param json_schema_path: The path to the schema JSON file
:return: Tuple of the resource name and the path to the schema file
"""
split_path = json_schema_path.split("/")
if split_path[0] == "" or split_path[0] == ".":
split_path = split_path[1:]
if len(split_path) == 0:
return "", ""
if len(split_path) == 1:
return "", split_path[0]
return split_path[0], "/".join(split_path[1:])