## What Migrating Pydantic V2 for Protocol Messages to speed up emitting records. This gives us 2.5x boost over V1. Close https://github.com/airbytehq/airbyte-internal-issues/issues/8333 ## How - Switch to using protocol models generated for pydantic_v2, in a new (temporary) package, `airbyte-protocol-models-pdv2` . - Update pydantic dependency of the CDK accordingly to v2. - For minimal impact, still use the compatibility code `pydantic.v1` in all of our pydantic code from airbyte-cdk that does not interact with the protocol models. ## Review guide 1. Checkout the code and clear your CDK virtual env (either `rm -rf .venv && python -m venv .venv` or `poetry env list; poetry env remove <env>`. This is necessary to fully clean out the `airbyte_protocol` library, for some reason. Then: `poetry lock --no-update && poetry install --all-extras`. This should install the CDK with new models. 2. Run unit tests on the CDK 3. Take your favorite connector and point it's `pyproject.toml` on local CDK (see example in `source-s3`) and try running it's tests and it's regression tests. ## User Impact > [!warning] > This is a major CDK change due to the pydantic dependency change - if connectors use pydantic 1.10, they will break and will need to do similar `from pydantic.v1` updates to get running again. Therefore, we should release this as a major CDK version bump. ## Can this PR be safely reverted and rolled back? - [x] YES 💚 - [ ] NO ❌ Even if sources migrate to this version, state format should not change, so a revert should be possible. ## Follow up work - Ella to move into issues <details> ### Source-s3 - turn this into an issue - [ ] Update source s3 CDK version and any required code changes - [ ] Fix source-s3 unit tests - [ ] Run source-s3 regression tests - [ ] Merge and release source-s3 by June 21st ### Docs - [ ] Update documentation on how to build with CDK ### CDK pieces - [ ] Update file-based CDK format validation to use Pydantic V2 - This is doable, and requires a breaking change to change `OneOfOptionConfig`. There are a few unhandled test cases that present issues we're unsure of how to handle so far. - [ ] Update low-code component generators to use Pydantic V2 - This is doable, there are a few issues around custom component generation that are unhandled. ### Further CDK performance work - create issues for these - [ ] Research if we can replace prints with buffered output (write to byte buffer and then flush to stdout) - [ ] Replace `json` with `orjson` ... </details>
135 lines
4.1 KiB
Python
135 lines
4.1 KiB
Python
#
|
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
#
|
|
|
|
|
|
import json
|
|
import logging
|
|
import os
|
|
import sys
|
|
import tempfile
|
|
from pathlib import Path
|
|
from typing import Any, Mapping
|
|
|
|
import pytest
|
|
import yaml
|
|
from airbyte_cdk import Connector
|
|
from airbyte_cdk.models import AirbyteConnectionStatus
|
|
from pydantic import AnyUrl
|
|
|
|
logger = logging.getLogger("airbyte")
|
|
|
|
MODULE = sys.modules[__name__]
|
|
MODULE_PATH = os.path.abspath(MODULE.__file__)
|
|
SPEC_ROOT = os.path.dirname(MODULE_PATH)
|
|
|
|
|
|
class MockConnector(Connector):
|
|
def check(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteConnectionStatus:
|
|
pass
|
|
|
|
|
|
@pytest.fixture()
|
|
def mock_config():
|
|
return {"bogus": "file"}
|
|
|
|
|
|
@pytest.fixture
|
|
def nonempty_file(mock_config):
|
|
with tempfile.NamedTemporaryFile("w") as file:
|
|
file.write(json.dumps(mock_config))
|
|
file.flush()
|
|
yield file
|
|
|
|
|
|
@pytest.fixture
|
|
def nonjson_file(mock_config):
|
|
with tempfile.NamedTemporaryFile("w") as file:
|
|
file.write("the content of this file is not JSON")
|
|
file.flush()
|
|
yield file
|
|
|
|
|
|
@pytest.fixture
|
|
def integration():
|
|
return MockConnector()
|
|
|
|
|
|
def test_read_config(nonempty_file, integration: Connector, mock_config):
|
|
actual = integration.read_config(nonempty_file.name)
|
|
assert actual == mock_config
|
|
|
|
|
|
def test_read_non_json_config(nonjson_file, integration: Connector):
|
|
with pytest.raises(ValueError, match="Could not read json file"):
|
|
integration.read_config(nonjson_file.name)
|
|
|
|
|
|
def test_write_config(integration, mock_config):
|
|
config_path = Path(tempfile.gettempdir()) / "config.json"
|
|
integration.write_config(mock_config, str(config_path))
|
|
with open(config_path, "r") as actual:
|
|
assert json.loads(actual.read()) == mock_config
|
|
|
|
|
|
class TestConnectorSpec:
|
|
CONNECTION_SPECIFICATION = {
|
|
"type": "object",
|
|
"required": ["api_token"],
|
|
"additionalProperties": False,
|
|
"properties": {"api_token": {"type": "string"}},
|
|
}
|
|
|
|
@pytest.fixture
|
|
def use_json_spec(self):
|
|
spec = {
|
|
"documentationUrl": "https://airbyte.com/#json",
|
|
"connectionSpecification": self.CONNECTION_SPECIFICATION,
|
|
}
|
|
|
|
json_path = os.path.join(SPEC_ROOT, "spec.json")
|
|
with open(json_path, "w") as f:
|
|
f.write(json.dumps(spec))
|
|
yield
|
|
os.remove(json_path)
|
|
|
|
@pytest.fixture
|
|
def use_invalid_json_spec(self):
|
|
json_path = os.path.join(SPEC_ROOT, "spec.json")
|
|
with open(json_path, "w") as f:
|
|
f.write("the content of this file is not JSON")
|
|
yield
|
|
os.remove(json_path)
|
|
|
|
@pytest.fixture
|
|
def use_yaml_spec(self):
|
|
spec = {"documentationUrl": "https://airbyte.com/#yaml", "connectionSpecification": self.CONNECTION_SPECIFICATION}
|
|
|
|
yaml_path = os.path.join(SPEC_ROOT, "spec.yaml")
|
|
with open(yaml_path, "w") as f:
|
|
f.write(yaml.dump(spec))
|
|
yield
|
|
os.remove(yaml_path)
|
|
|
|
def test_spec_from_json_file(self, integration, use_json_spec):
|
|
connector_spec = integration.spec(logger)
|
|
assert connector_spec.documentationUrl == AnyUrl("https://airbyte.com/#json")
|
|
assert connector_spec.connectionSpecification == self.CONNECTION_SPECIFICATION
|
|
|
|
def test_spec_from_improperly_formatted_json_file(self, integration, use_invalid_json_spec):
|
|
with pytest.raises(ValueError, match="Could not read json spec file"):
|
|
integration.spec(logger)
|
|
|
|
def test_spec_from_yaml_file(self, integration, use_yaml_spec):
|
|
connector_spec = integration.spec(logger)
|
|
assert connector_spec.documentationUrl == AnyUrl("https://airbyte.com/#yaml")
|
|
assert connector_spec.connectionSpecification == self.CONNECTION_SPECIFICATION
|
|
|
|
def test_multiple_spec_files_raises_exception(self, integration, use_yaml_spec, use_json_spec):
|
|
with pytest.raises(RuntimeError, match="spec.yaml or spec.json"):
|
|
integration.spec(logger)
|
|
|
|
def test_no_spec_file_raises_exception(self, integration):
|
|
with pytest.raises(FileNotFoundError, match="Unable to find spec."):
|
|
integration.spec(logger)
|