chore: bump schema-generator version to 0.1.1 and update poetry.lock (#54878)
Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: natik@airbyte.io <natik@airbyte.io>
This commit is contained in:
committed by
GitHub
parent
3f0c3ab1bd
commit
7b485beb44
749
tools/schema_generator/poetry.lock
generated
749
tools/schema_generator/poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "schema-generator"
|
name = "schema-generator"
|
||||||
version = "0.1.0"
|
version = "0.1.1"
|
||||||
description = "Util to create catalog schemas for an Airbyte Connector."
|
description = "Util to create catalog schemas for an Airbyte Connector."
|
||||||
authors = ["Airbyte <contact@airbyte.io>"]
|
authors = ["Airbyte <contact@airbyte.io>"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
@@ -19,8 +19,8 @@ classifiers = [
|
|||||||
keywords = ["airbyte", "connector-development-kit", "cdk"]
|
keywords = ["airbyte", "connector-development-kit", "cdk"]
|
||||||
|
|
||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = ">=3.9, <3.12"
|
python = ">=3.10, <3.12"
|
||||||
airbyte-cdk = " < 3"
|
airbyte-cdk = ">=6.0.0, <7.0.0"
|
||||||
genson = "< 2"
|
genson = "< 2"
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies]
|
[tool.poetry.group.dev.dependencies]
|
||||||
|
|||||||
@@ -5,16 +5,20 @@
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from airbyte_cdk.models import AirbyteMessage, ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, DestinationSyncMode
|
from airbyte_cdk.models import ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, DestinationSyncMode
|
||||||
|
|
||||||
|
|
||||||
def configure_catalog():
|
def configure_catalog():
|
||||||
record = AirbyteMessage.parse_raw(input())
|
record_dict = json.loads(input())
|
||||||
for stream in record.catalog.streams:
|
catalog_streams = record_dict.get("catalog", {}).get("streams", [])
|
||||||
stream.json_schema = {}
|
for stream in catalog_streams:
|
||||||
|
stream["json_schema"] = {}
|
||||||
streams = [
|
streams = [
|
||||||
ConfiguredAirbyteStream(stream=stream, sync_mode=stream.supported_sync_modes[0], destination_sync_mode=DestinationSyncMode.append)
|
ConfiguredAirbyteStream(
|
||||||
for stream in record.catalog.streams
|
stream=stream.get("name"), sync_mode=stream.get("supported_sync_modes", [])[0], destination_sync_mode=DestinationSyncMode.append
|
||||||
|
)
|
||||||
|
for stream in catalog_streams
|
||||||
|
if stream.get("supported_sync_modes")
|
||||||
]
|
]
|
||||||
configured_catalog = ConfiguredAirbyteCatalog(streams=streams)
|
configured_catalog = ConfiguredAirbyteCatalog(streams=streams)
|
||||||
|
|
||||||
@@ -23,4 +27,17 @@ def configure_catalog():
|
|||||||
os.mkdir(default_folder)
|
os.mkdir(default_folder)
|
||||||
output_file_name = os.path.join(default_folder, "configured_catalog.json")
|
output_file_name = os.path.join(default_folder, "configured_catalog.json")
|
||||||
with open(output_file_name, "w") as outfile:
|
with open(output_file_name, "w") as outfile:
|
||||||
json.dump(json.loads(configured_catalog.json()), outfile, indent=2, sort_keys=True)
|
# Create a dictionary representation of the configured catalog
|
||||||
|
result = {"streams": []}
|
||||||
|
for stream in configured_catalog.streams:
|
||||||
|
stream_dict = {
|
||||||
|
"stream": {
|
||||||
|
"name": stream.stream.name if hasattr(stream.stream, "name") else stream.stream,
|
||||||
|
"supported_sync_modes": ["full_refresh"],
|
||||||
|
"json_schema": {},
|
||||||
|
},
|
||||||
|
"sync_mode": str(stream.sync_mode),
|
||||||
|
"destination_sync_mode": "append",
|
||||||
|
}
|
||||||
|
result["streams"].append(stream_dict)
|
||||||
|
json.dump(result, outfile, indent=2, sort_keys=True)
|
||||||
|
|||||||
@@ -30,7 +30,8 @@ import genson.schema.strategies as strategies
|
|||||||
from genson import SchemaBuilder
|
from genson import SchemaBuilder
|
||||||
from genson.schema.strategies.object import Object
|
from genson.schema.strategies.object import Object
|
||||||
|
|
||||||
from airbyte_cdk.models import AirbyteMessage, Type
|
|
||||||
|
# Import removed as we're using JSON dictionaries directly
|
||||||
|
|
||||||
|
|
||||||
class NoRequiredObj(Object):
|
class NoRequiredObj(Object):
|
||||||
@@ -65,15 +66,15 @@ def infer_schemas():
|
|||||||
|
|
||||||
builders = {}
|
builders = {}
|
||||||
for line in sys.stdin:
|
for line in sys.stdin:
|
||||||
message = AirbyteMessage.parse_raw(line)
|
message_dict = json.loads(line)
|
||||||
if message.type == Type.RECORD:
|
if message_dict.get("type") == "RECORD":
|
||||||
stream_name = message.record.stream
|
stream_name = message_dict.get("record", {}).get("stream")
|
||||||
if stream_name not in builders:
|
if stream_name not in builders:
|
||||||
builder = NoRequiredSchemaBuilder()
|
builder = NoRequiredSchemaBuilder()
|
||||||
builders[stream_name] = builder
|
builders[stream_name] = builder
|
||||||
else:
|
else:
|
||||||
builder = builders[stream_name]
|
builder = builders[stream_name]
|
||||||
builder.add_object(message.record.data)
|
builder.add_object(message_dict.get("record", {}).get("data", {}))
|
||||||
for stream_name, builder in builders.items():
|
for stream_name, builder in builders.items():
|
||||||
schema = builder.to_schema()
|
schema = builder.to_schema()
|
||||||
output_file_name = os.path.join(default_folder, stream_name + ".json")
|
output_file_name = os.path.join(default_folder, stream_name + ".json")
|
||||||
|
|||||||
@@ -27,14 +27,22 @@ from schema_generator.infer_schemas import infer_schemas
|
|||||||
def test_configure_catalog():
|
def test_configure_catalog():
|
||||||
stream = AirbyteStream(name="stream", supported_sync_modes=[SyncMode.full_refresh], json_schema={})
|
stream = AirbyteStream(name="stream", supported_sync_modes=[SyncMode.full_refresh], json_schema={})
|
||||||
catalog = AirbyteCatalog(streams=[stream])
|
catalog = AirbyteCatalog(streams=[stream])
|
||||||
catalog_message = AirbyteMessage(type=Type.CATALOG, catalog=catalog)
|
catalog_dict = {"type": "CATALOG", "catalog": {"streams": [{"name": "stream", "supported_sync_modes": ["full_refresh"], "json_schema": {}}]}}
|
||||||
sys.stdin = io.StringIO(catalog_message.json())
|
sys.stdin = io.StringIO(json.dumps(catalog_dict))
|
||||||
|
|
||||||
expected_configured_catalog = ConfiguredAirbyteCatalog(
|
expected_configured_catalog_json = {
|
||||||
streams=[ConfiguredAirbyteStream(stream=stream, sync_mode=SyncMode.full_refresh, destination_sync_mode=DestinationSyncMode.append)]
|
"streams": [
|
||||||
)
|
{
|
||||||
|
"stream": {
|
||||||
expected_configured_catalog_json = json.loads(expected_configured_catalog.json())
|
"name": "stream",
|
||||||
|
"supported_sync_modes": ["full_refresh"],
|
||||||
|
"json_schema": {}
|
||||||
|
},
|
||||||
|
"sync_mode": "full_refresh",
|
||||||
|
"destination_sync_mode": "append"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as temp_dir:
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
os.chdir(temp_dir)
|
os.chdir(temp_dir)
|
||||||
@@ -48,15 +56,16 @@ def test_configure_catalog():
|
|||||||
|
|
||||||
def test_infer_schemas():
|
def test_infer_schemas():
|
||||||
expected_schema = {
|
expected_schema = {
|
||||||
"$schema": "http://json-schema.org/schema#",
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
"properties": {"a": {"type": "integer"}, "b": {"type": "string"}},
|
"properties": {"a": {"type": "integer"}, "b": {"type": "string"}},
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
"additionalProperties": True,
|
||||||
}
|
}
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as temp_dir:
|
with tempfile.TemporaryDirectory() as temp_dir:
|
||||||
os.chdir(temp_dir)
|
os.chdir(temp_dir)
|
||||||
record = {"a": 1, "b": "test"}
|
record = {"a": 1, "b": "test"}
|
||||||
record_message = AirbyteMessage(type=Type.RECORD, record=AirbyteRecordMessage(stream="stream", data=record, emitted_at=111)).json()
|
record_message = json.dumps({"type": "RECORD", "record": {"stream": "stream", "data": record, "emitted_at": 111}})
|
||||||
sys.stdin = io.StringIO(record_message)
|
sys.stdin = io.StringIO(record_message)
|
||||||
infer_schemas()
|
infer_schemas()
|
||||||
assert os.path.exists("schemas/stream.json")
|
assert os.path.exists("schemas/stream.json")
|
||||||
|
|||||||
Reference in New Issue
Block a user