1
0
mirror of synced 2025-12-19 18:14:56 -05:00

chore: bump schema-generator version to 0.1.1 and update poetry.lock (#54878)

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: natik@airbyte.io <natik@airbyte.io>
This commit is contained in:
devin-ai-integration[bot]
2025-03-01 18:29:31 +00:00
committed by GitHub
parent 3f0c3ab1bd
commit 7b485beb44
5 changed files with 730 additions and 94 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
[tool.poetry]
name = "schema-generator"
version = "0.1.0"
version = "0.1.1"
description = "Util to create catalog schemas for an Airbyte Connector."
authors = ["Airbyte <contact@airbyte.io>"]
license = "MIT"
@@ -19,8 +19,8 @@ classifiers = [
keywords = ["airbyte", "connector-development-kit", "cdk"]
[tool.poetry.dependencies]
python = ">=3.9, <3.12"
airbyte-cdk = " < 3"
python = ">=3.10, <3.12"
airbyte-cdk = ">=6.0.0, <7.0.0"
genson = "< 2"
[tool.poetry.group.dev.dependencies]

View File

@@ -5,16 +5,20 @@
import json
import os
from airbyte_cdk.models import AirbyteMessage, ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, DestinationSyncMode
from airbyte_cdk.models import ConfiguredAirbyteCatalog, ConfiguredAirbyteStream, DestinationSyncMode
def configure_catalog():
record = AirbyteMessage.parse_raw(input())
for stream in record.catalog.streams:
stream.json_schema = {}
record_dict = json.loads(input())
catalog_streams = record_dict.get("catalog", {}).get("streams", [])
for stream in catalog_streams:
stream["json_schema"] = {}
streams = [
ConfiguredAirbyteStream(stream=stream, sync_mode=stream.supported_sync_modes[0], destination_sync_mode=DestinationSyncMode.append)
for stream in record.catalog.streams
ConfiguredAirbyteStream(
stream=stream.get("name"), sync_mode=stream.get("supported_sync_modes", [])[0], destination_sync_mode=DestinationSyncMode.append
)
for stream in catalog_streams
if stream.get("supported_sync_modes")
]
configured_catalog = ConfiguredAirbyteCatalog(streams=streams)
@@ -23,4 +27,17 @@ def configure_catalog():
os.mkdir(default_folder)
output_file_name = os.path.join(default_folder, "configured_catalog.json")
with open(output_file_name, "w") as outfile:
json.dump(json.loads(configured_catalog.json()), outfile, indent=2, sort_keys=True)
# Create a dictionary representation of the configured catalog
result = {"streams": []}
for stream in configured_catalog.streams:
stream_dict = {
"stream": {
"name": stream.stream.name if hasattr(stream.stream, "name") else stream.stream,
"supported_sync_modes": ["full_refresh"],
"json_schema": {},
},
"sync_mode": str(stream.sync_mode),
"destination_sync_mode": "append",
}
result["streams"].append(stream_dict)
json.dump(result, outfile, indent=2, sort_keys=True)

View File

@@ -30,7 +30,8 @@ import genson.schema.strategies as strategies
from genson import SchemaBuilder
from genson.schema.strategies.object import Object
from airbyte_cdk.models import AirbyteMessage, Type
# Import removed as we're using JSON dictionaries directly
class NoRequiredObj(Object):
@@ -65,15 +66,15 @@ def infer_schemas():
builders = {}
for line in sys.stdin:
message = AirbyteMessage.parse_raw(line)
if message.type == Type.RECORD:
stream_name = message.record.stream
message_dict = json.loads(line)
if message_dict.get("type") == "RECORD":
stream_name = message_dict.get("record", {}).get("stream")
if stream_name not in builders:
builder = NoRequiredSchemaBuilder()
builders[stream_name] = builder
else:
builder = builders[stream_name]
builder.add_object(message.record.data)
builder.add_object(message_dict.get("record", {}).get("data", {}))
for stream_name, builder in builders.items():
schema = builder.to_schema()
output_file_name = os.path.join(default_folder, stream_name + ".json")

View File

@@ -27,14 +27,22 @@ from schema_generator.infer_schemas import infer_schemas
def test_configure_catalog():
stream = AirbyteStream(name="stream", supported_sync_modes=[SyncMode.full_refresh], json_schema={})
catalog = AirbyteCatalog(streams=[stream])
catalog_message = AirbyteMessage(type=Type.CATALOG, catalog=catalog)
sys.stdin = io.StringIO(catalog_message.json())
catalog_dict = {"type": "CATALOG", "catalog": {"streams": [{"name": "stream", "supported_sync_modes": ["full_refresh"], "json_schema": {}}]}}
sys.stdin = io.StringIO(json.dumps(catalog_dict))
expected_configured_catalog = ConfiguredAirbyteCatalog(
streams=[ConfiguredAirbyteStream(stream=stream, sync_mode=SyncMode.full_refresh, destination_sync_mode=DestinationSyncMode.append)]
)
expected_configured_catalog_json = json.loads(expected_configured_catalog.json())
expected_configured_catalog_json = {
"streams": [
{
"stream": {
"name": "stream",
"supported_sync_modes": ["full_refresh"],
"json_schema": {}
},
"sync_mode": "full_refresh",
"destination_sync_mode": "append"
}
]
}
with tempfile.TemporaryDirectory() as temp_dir:
os.chdir(temp_dir)
@@ -48,15 +56,16 @@ def test_configure_catalog():
def test_infer_schemas():
expected_schema = {
"$schema": "http://json-schema.org/schema#",
"$schema": "http://json-schema.org/draft-07/schema#",
"properties": {"a": {"type": "integer"}, "b": {"type": "string"}},
"type": "object",
"additionalProperties": True,
}
with tempfile.TemporaryDirectory() as temp_dir:
os.chdir(temp_dir)
record = {"a": 1, "b": "test"}
record_message = AirbyteMessage(type=Type.RECORD, record=AirbyteRecordMessage(stream="stream", data=record, emitted_at=111)).json()
record_message = json.dumps({"type": "RECORD", "record": {"stream": "stream", "data": record, "emitted_at": 111}})
sys.stdin = io.StringIO(record_message)
infer_schemas()
assert os.path.exists("schemas/stream.json")