chore: bump schema-generator version to 0.1.1 and update poetry.lock (#54878)
Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: natik@airbyte.io <natik@airbyte.io>
This commit is contained in:
committed by
GitHub
parent
3f0c3ab1bd
commit
7b485beb44
749
tools/schema_generator/poetry.lock
generated
749
tools/schema_generator/poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "schema-generator"
|
||||
version = "0.1.0"
|
||||
version = "0.1.1"
|
||||
description = "Util to create catalog schemas for an Airbyte Connector."
|
||||
authors = ["Airbyte <contact@airbyte.io>"]
|
||||
license = "MIT"
|
||||
@@ -19,8 +19,8 @@ classifiers = [
|
||||
keywords = ["airbyte", "connector-development-kit", "cdk"]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.9, <3.12"
|
||||
airbyte-cdk = " < 3"
|
||||
python = ">=3.10, <3.12"
|
||||
airbyte-cdk = ">=6.0.0, <7.0.0"
|
||||
genson = "< 2"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
|
||||
@@ -5,16 +5,20 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
from airbyte_cdk.models import AirbyteMessage, ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, DestinationSyncMode
|
||||
from airbyte_cdk.models import ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, DestinationSyncMode
|
||||
|
||||
|
||||
def configure_catalog():
|
||||
record = AirbyteMessage.parse_raw(input())
|
||||
for stream in record.catalog.streams:
|
||||
stream.json_schema = {}
|
||||
record_dict = json.loads(input())
|
||||
catalog_streams = record_dict.get("catalog", {}).get("streams", [])
|
||||
for stream in catalog_streams:
|
||||
stream["json_schema"] = {}
|
||||
streams = [
|
||||
ConfiguredAirbyteStream(stream=stream, sync_mode=stream.supported_sync_modes[0], destination_sync_mode=DestinationSyncMode.append)
|
||||
for stream in record.catalog.streams
|
||||
ConfiguredAirbyteStream(
|
||||
stream=stream.get("name"), sync_mode=stream.get("supported_sync_modes", [])[0], destination_sync_mode=DestinationSyncMode.append
|
||||
)
|
||||
for stream in catalog_streams
|
||||
if stream.get("supported_sync_modes")
|
||||
]
|
||||
configured_catalog = ConfiguredAirbyteCatalog(streams=streams)
|
||||
|
||||
@@ -23,4 +27,17 @@ def configure_catalog():
|
||||
os.mkdir(default_folder)
|
||||
output_file_name = os.path.join(default_folder, "configured_catalog.json")
|
||||
with open(output_file_name, "w") as outfile:
|
||||
json.dump(json.loads(configured_catalog.json()), outfile, indent=2, sort_keys=True)
|
||||
# Create a dictionary representation of the configured catalog
|
||||
result = {"streams": []}
|
||||
for stream in configured_catalog.streams:
|
||||
stream_dict = {
|
||||
"stream": {
|
||||
"name": stream.stream.name if hasattr(stream.stream, "name") else stream.stream,
|
||||
"supported_sync_modes": ["full_refresh"],
|
||||
"json_schema": {},
|
||||
},
|
||||
"sync_mode": str(stream.sync_mode),
|
||||
"destination_sync_mode": "append",
|
||||
}
|
||||
result["streams"].append(stream_dict)
|
||||
json.dump(result, outfile, indent=2, sort_keys=True)
|
||||
|
||||
@@ -30,7 +30,8 @@ import genson.schema.strategies as strategies
|
||||
from genson import SchemaBuilder
|
||||
from genson.schema.strategies.object import Object
|
||||
|
||||
from airbyte_cdk.models import AirbyteMessage, Type
|
||||
|
||||
# Import removed as we're using JSON dictionaries directly
|
||||
|
||||
|
||||
class NoRequiredObj(Object):
|
||||
@@ -65,15 +66,15 @@ def infer_schemas():
|
||||
|
||||
builders = {}
|
||||
for line in sys.stdin:
|
||||
message = AirbyteMessage.parse_raw(line)
|
||||
if message.type == Type.RECORD:
|
||||
stream_name = message.record.stream
|
||||
message_dict = json.loads(line)
|
||||
if message_dict.get("type") == "RECORD":
|
||||
stream_name = message_dict.get("record", {}).get("stream")
|
||||
if stream_name not in builders:
|
||||
builder = NoRequiredSchemaBuilder()
|
||||
builders[stream_name] = builder
|
||||
else:
|
||||
builder = builders[stream_name]
|
||||
builder.add_object(message.record.data)
|
||||
builder.add_object(message_dict.get("record", {}).get("data", {}))
|
||||
for stream_name, builder in builders.items():
|
||||
schema = builder.to_schema()
|
||||
output_file_name = os.path.join(default_folder, stream_name + ".json")
|
||||
|
||||
@@ -27,14 +27,22 @@ from schema_generator.infer_schemas import infer_schemas
|
||||
def test_configure_catalog():
|
||||
stream = AirbyteStream(name="stream", supported_sync_modes=[SyncMode.full_refresh], json_schema={})
|
||||
catalog = AirbyteCatalog(streams=[stream])
|
||||
catalog_message = AirbyteMessage(type=Type.CATALOG, catalog=catalog)
|
||||
sys.stdin = io.StringIO(catalog_message.json())
|
||||
catalog_dict = {"type": "CATALOG", "catalog": {"streams": [{"name": "stream", "supported_sync_modes": ["full_refresh"], "json_schema": {}}]}}
|
||||
sys.stdin = io.StringIO(json.dumps(catalog_dict))
|
||||
|
||||
expected_configured_catalog = ConfiguredAirbyteCatalog(
|
||||
streams=[ConfiguredAirbyteStream(stream=stream, sync_mode=SyncMode.full_refresh, destination_sync_mode=DestinationSyncMode.append)]
|
||||
)
|
||||
|
||||
expected_configured_catalog_json = json.loads(expected_configured_catalog.json())
|
||||
expected_configured_catalog_json = {
|
||||
"streams": [
|
||||
{
|
||||
"stream": {
|
||||
"name": "stream",
|
||||
"supported_sync_modes": ["full_refresh"],
|
||||
"json_schema": {}
|
||||
},
|
||||
"sync_mode": "full_refresh",
|
||||
"destination_sync_mode": "append"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
os.chdir(temp_dir)
|
||||
@@ -48,15 +56,16 @@ def test_configure_catalog():
|
||||
|
||||
def test_infer_schemas():
|
||||
expected_schema = {
|
||||
"$schema": "http://json-schema.org/schema#",
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"properties": {"a": {"type": "integer"}, "b": {"type": "string"}},
|
||||
"type": "object",
|
||||
"additionalProperties": True,
|
||||
}
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
os.chdir(temp_dir)
|
||||
record = {"a": 1, "b": "test"}
|
||||
record_message = AirbyteMessage(type=Type.RECORD, record=AirbyteRecordMessage(stream="stream", data=record, emitted_at=111)).json()
|
||||
record_message = json.dumps({"type": "RECORD", "record": {"stream": "stream", "data": record, "emitted_at": 111}})
|
||||
sys.stdin = io.StringIO(record_message)
|
||||
infer_schemas()
|
||||
assert os.path.exists("schemas/stream.json")
|
||||
|
||||
Reference in New Issue
Block a user