1
0
mirror of synced 2025-12-22 11:31:02 -05:00
Files
airbyte/airbyte-integrations/connectors/source-s3/source_s3/source.py
2022-02-02 00:49:18 +02:00

56 lines
2.3 KiB
Python

#
# Copyright (c) 2021 Airbyte, Inc., all rights reserved.
#
from typing import Any, Mapping, Optional
from pydantic import BaseModel, Field
from .source_files_abstract.source import SourceFilesAbstract
from .source_files_abstract.spec import SourceFilesAbstractSpec
from .stream import IncrementalFileStreamS3
class SourceS3Spec(SourceFilesAbstractSpec, BaseModel):
class Config:
title = "S3 Source Spec"
class S3Provider(BaseModel):
class Config:
title = "S3: Amazon Web Services"
bucket: str = Field(description="Name of the S3 bucket where the file(s) exist.")
aws_access_key_id: Optional[str] = Field(
default=None,
description="In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.",
airbyte_secret=True,
)
aws_secret_access_key: Optional[str] = Field(
default=None,
description="In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.",
airbyte_secret=True,
)
path_prefix: str = Field(
default="",
description="By providing a path-like prefix (e.g. myFolder/thisTable/) under which all the relevant files sit, we can optimise finding these in S3. This is optional but recommended if your bucket contains many folders/files.",
)
endpoint: str = Field("", description="Endpoint to an S3 compatible service. Leave empty to use AWS.")
use_ssl: bool = Field(default=None, description="Is remote server using secure SSL/TLS connection")
verify_ssl_cert: bool = Field(default=None, description="Allow self signed certificates")
provider: S3Provider
class SourceS3(SourceFilesAbstract):
stream_class = IncrementalFileStreamS3
spec_class = SourceS3Spec
documentation_url = "https://docs.airbyte.io/integrations/sources/s3"
def read_config(self, config_path: str) -> Mapping[str, Any]:
config: Mapping[str, Any] = super().read_config(config_path)
if config.get("format", {}).get("delimiter") == r"\t":
config["format"]["delimiter"] = "\t"
return config