1
0
mirror of synced 2025-12-19 18:14:56 -05:00
Files
airbyte/airbyte-integrations/connectors/source-s3/source_s3/v4/source.py
Catherine Noll ee7da12dac Source S3: revert rollback to 4.4.1 (#35055)
Co-authored-by: Augustin <augustin@airbyte.io>
2024-02-16 14:52:23 -05:00

116 lines
4.6 KiB
Python

#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
from typing import Any, Dict, Mapping, Optional
from airbyte_cdk.config_observation import emit_configuration_as_airbyte_control_message
from airbyte_cdk.models import ConnectorSpecification
from airbyte_cdk.sources.file_based.file_based_source import DEFAULT_CONCURRENCY, FileBasedSource
from airbyte_cdk.utils import is_cloud_environment
from source_s3.source import SourceS3Spec
from source_s3.v4.legacy_config_transformer import LegacyConfigTransformer
_V3_DEPRECATION_FIELD_MAPPING = {
"dataset": "streams.name",
"format": "streams.format",
"path_pattern": "streams.globs",
"provider": "bucket, aws_access_key_id, aws_secret_access_key and endpoint",
"schema": "streams.input_schema",
}
class SourceS3(FileBasedSource):
@classmethod
def read_config(cls, config_path: str) -> Mapping[str, Any]:
"""
Used to override the default read_config so that when the new file-based S3 connector processes a config
in the legacy format, it can be transformed into the new config. This happens in entrypoint before we
validate the config against the new spec.
"""
config = super().read_config(config_path)
if not SourceS3._is_v4_config(config):
parsed_legacy_config = SourceS3Spec(**config)
converted_config = LegacyConfigTransformer.convert(parsed_legacy_config)
emit_configuration_as_airbyte_control_message(converted_config)
return converted_config
return config
def spec(self, *args: Any, **kwargs: Any) -> ConnectorSpecification:
s3_spec = SourceS3Spec.schema()
s4_spec = self.spec_class.schema()
if s3_spec["properties"].keys() & s4_spec["properties"].keys():
raise ValueError("Overlapping properties between V3 and V4")
for v3_property_key, v3_property_value in s3_spec["properties"].items():
s4_spec["properties"][v3_property_key] = v3_property_value
s4_spec["properties"][v3_property_key]["airbyte_hidden"] = True
s4_spec["properties"][v3_property_key]["order"] += 100
s4_spec["properties"][v3_property_key]["description"] = (
SourceS3._create_description_with_deprecation_prefix(_V3_DEPRECATION_FIELD_MAPPING.get(v3_property_key, None))
+ s4_spec["properties"][v3_property_key]["description"]
)
self._clean_required_fields(s4_spec["properties"][v3_property_key])
if is_cloud_environment():
s4_spec["properties"]["endpoint"].update(
{
"description": "Endpoint to an S3 compatible service. Leave empty to use AWS. "
"The custom endpoint must be secure, but the 'https' prefix is not required.",
"pattern": "^(?!http://).*$", # ignore-https-check
}
)
return ConnectorSpecification(
documentationUrl=self.spec_class.documentation_url(),
connectionSpecification=s4_spec,
)
@staticmethod
def _is_v4_config(config: Mapping[str, Any]) -> bool:
return "streams" in config
@staticmethod
def _clean_required_fields(v3_field: Dict[str, Any]) -> None:
"""
Not having V3 fields root level as part of the `required` field is not enough as the platform will create empty objects for those.
For example, filling all non-hidden fields from the form will create a config like:
```
{
<...>
"provider": {},
<...>
}
```
As the field `provider` exists, the JSON validation will be applied and as `provider.bucket` is needed, the validation will fail
with the following error:
```
"errors": {
"connectionConfiguration": {
"provider": {
"bucket": {
"message": "form.empty.error",
"type": "required"
}
}
}
}
```
Hence, we need to make any V3 nested fields not required.
"""
if "properties" not in v3_field:
return
v3_field["required"] = []
for neste_field in v3_field["properties"]:
SourceS3._clean_required_fields(neste_field)
@staticmethod
def _create_description_with_deprecation_prefix(new_fields: Optional[str]) -> str:
if new_fields:
return f"Deprecated and will be removed soon. Please do not use this field anymore and use {new_fields} instead. "
return "Deprecated and will be removed soon. Please do not use this field anymore. "