{
"documentationUrl": "https://docs.airbyte.com/integrations/sources/file",
"connectionSpecification": {
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "File Source Spec",
"type": "object",
"additionalProperties": true,
"required": ["dataset_name", "format", "url", "provider"],
"properties": {
"dataset_name": {
"type": "string",
"title": "Dataset Name",
"description": "The Name of the final table to replicate this file into (should include letters, numbers dash and underscores only)."
},
"format": {
"type": "string",
"enum": [
"csv",
"json",
"jsonl",
"excel",
"excel_binary",
"fwf",
"feather",
"parquet",
"yaml"
],
"default": "csv",
"title": "File Format",
"description": "The Format of the file which should be replicated (Warning: some formats may be experimental, please refer to the docs)."
},
"reader_options": {
"type": "string",
"title": "Reader Options",
"description": "This should be a string in JSON format. It depends on the chosen file format to provide additional options and tune its behavior.",
"examples": [
"{}",
"{\"sep\": \" \"}",
"{\"sep\": \"\t\", \"header\": 0, \"names\": [\"column1\", \"column2\"] }"
]
},
"url": {
"type": "string",
"title": "URL",
"description": "The URL path to access the file which should be replicated.",
"examples": [
"https://storage.googleapis.com/covid19-open-data/v2/latest/epidemiology.csv",
"gs://my-google-bucket/data.csv",
"s3://gdelt-open-data/events/20190914.export.csv"
]
},
"provider": {
"type": "object",
"title": "Storage Provider",
"description": "The storage Provider or Location of the file(s) which should be replicated.",
"default": "Public Web",
"oneOf": [
{
"title": "HTTPS: Public Web",
"required": ["storage"],
"properties": {
"storage": {
"type": "string",
"const": "HTTPS"
},
"user_agent": {
"type": "boolean",
"title": "User-Agent",
"default": false,
"description": "Add User-Agent to request"
}
}
},
{
"title": "GCS: Google Cloud Storage",
"required": ["storage"],
"properties": {
"storage": {
"type": "string",
"title": "Storage",
"const": "GCS"
},
"service_account_json": {
"type": "string",
"title": "Service Account JSON",
"airbyte_secret": true,
"description": "In order to access private Buckets stored on Google Cloud, this connector would need a service account json credentials with the proper permissions as described here. Please generate the credentials.json file and copy/paste its content to this field (expecting JSON formats). If accessing publicly available data, this field is not necessary."
}
}
},
{
"title": "S3: Amazon Web Services",
"required": ["storage"],
"properties": {
"storage": {
"type": "string",
"title": "Storage",
"const": "S3"
},
"aws_access_key_id": {
"type": "string",
"title": "AWS Access Key ID",
"description": "In order to access private Buckets stored on AWS S3, this connector would need credentials with the proper permissions. If accessing publicly available data, this field is not necessary."
},
"aws_secret_access_key": {
"type": "string",
"title": "AWS Secret Access Key",
"description": "In order to access private Buckets stored on AWS S3, this connector would need credentials with the proper permissions. If accessing publicly available data, this field is not necessary.",
"airbyte_secret": true
}
}
},
{
"title": "AzBlob: Azure Blob Storage",
"required": ["storage", "storage_account"],
"properties": {
"storage": {
"type": "string",
"title": "Storage",
"const": "AzBlob"
},
"storage_account": {
"type": "string",
"title": "Storage Account",
"description": "The globally unique name of the storage account that the desired blob sits within. See here for more details."
},
"sas_token": {
"type": "string",
"title": "SAS Token",
"description": "To access Azure Blob Storage, this connector would need credentials with the proper permissions. One option is a SAS (Shared Access Signature) token. If accessing publicly available data, this field is not necessary.",
"airbyte_secret": true
},
"shared_key": {
"type": "string",
"title": "Shared Key",
"description": "To access Azure Blob Storage, this connector would need credentials with the proper permissions. One option is a storage account shared key (aka account key or access key). If accessing publicly available data, this field is not necessary.",
"airbyte_secret": true
}
}
},
{
"title": "SSH: Secure Shell",
"required": ["storage", "user", "host"],
"properties": {
"storage": {
"type": "string",
"title": "Storage",
"const": "SSH"
},
"user": {
"type": "string",
"title": "User",
"description": ""
},
"password": {
"type": "string",
"title": "Password",
"description": "",
"airbyte_secret": true
},
"host": {
"type": "string",
"title": "Host",
"description": ""
},
"port": {
"type": "string",
"title": "Port",
"default": "22",
"description": ""
}
}
},
{
"title": "SCP: Secure copy protocol",
"required": ["storage", "user", "host"],
"properties": {
"storage": {
"type": "string",
"title": "Storage",
"const": "SCP"
},
"user": {
"type": "string",
"title": "User",
"description": ""
},
"password": {
"type": "string",
"title": "Password",
"description": "",
"airbyte_secret": true
},
"host": {
"type": "string",
"title": "Host",
"description": ""
},
"port": {
"type": "string",
"title": "Port",
"default": "22",
"description": ""
}
}
},
{
"title": "SFTP: Secure File Transfer Protocol",
"required": ["storage", "user", "host"],
"properties": {
"storage": {
"type": "string",
"title": "Storage",
"const": "SFTP"
},
"user": {
"type": "string",
"title": "User",
"description": ""
},
"password": {
"type": "string",
"title": "Password",
"description": "",
"airbyte_secret": true
},
"host": {
"type": "string",
"title": "Host",
"description": ""
},
"port": {
"type": "string",
"title": "Port",
"default": "22",
"description": ""
}
}
},
{
"title": "Local Filesystem (limited)",
"required": ["storage"],
"properties": {
"storage": {
"type": "string",
"title": "Storage",
"description": "WARNING: Note that the local storage URL available for reading must start with the local mount \"/local/\" at the moment until we implement more advanced docker mounting options.",
"const": "local"
}
}
}
]
}
}
}
}