From 4e9f4bdd0658ba3aa5cc51c859e53f971553744f Mon Sep 17 00:00:00 2001 From: "Sherif A. Nada" Date: Tue, 27 Apr 2021 00:28:10 -0700 Subject: [PATCH] CDK: tutorial for implementing an HTTP source (#3079) --- .../bases/base-python/CDK-README.md | 10 +- .../docs/tutorials/http_api_source.md | 526 ++++++++++++++++++ .../configured_catalog.json | 131 +++++ .../exchange_rates.json | 116 ++++ .../generator/build.gradle | 2 +- .../connector-templates/generator/plopfile.js | 4 +- .../source-python-http-api/setup.py.hbs | 2 +- .../source_{{snakeCase name}}/source.py.hbs | 4 +- .../source_{{snakeCase name}}/spec.json.hbs | 6 +- .../source_exchange_rates/spec.json | 4 +- .../source-python-http-tutorial/.dockerignore | 6 + .../source-python-http-tutorial/Dockerfile | 16 + .../source-python-http-tutorial/README.md | 100 ++++ .../source-python-http-tutorial/build.gradle | 32 ++ .../source-python-http-tutorial/main_dev.py | 55 ++ .../requirements.txt | 4 + .../sample_files/config.json | 1 + .../sample_files/configured_catalog.json | 131 +++++ .../sample_files/invalid_config.json | 1 + .../sample_files/state.json | 5 + .../source-python-http-tutorial/setup.py | 58 ++ .../source_python_http_tutorial/__init__.py | 27 + .../schemas/TODO.md | 25 + .../schemas/customers.json | 16 + .../schemas/employees.json | 19 + .../schemas/exchange_rates.json | 119 ++++ .../source_python_http_tutorial/source.py | 122 ++++ .../source_python_http_tutorial/spec.json | 22 + .../unit_tests/unit_tests.py | 52 ++ .../source-scaffold-source-http/setup.py | 2 +- .../source_scaffold_source_http/source.py | 4 +- .../source_scaffold_source_http/spec.json | 6 +- docs/tutorials/slack-history/index.html | 1 + 33 files changed, 1604 insertions(+), 25 deletions(-) create mode 100644 airbyte-integrations/bases/base-python/docs/tutorials/http_api_source.md create mode 100644 airbyte-integrations/bases/base-python/docs/tutorials/http_api_source_assets/configured_catalog.json create mode 100644 airbyte-integrations/bases/base-python/docs/tutorials/http_api_source_assets/exchange_rates.json create mode 100644 airbyte-integrations/connectors/source-python-http-tutorial/.dockerignore create mode 100644 airbyte-integrations/connectors/source-python-http-tutorial/Dockerfile create mode 100644 airbyte-integrations/connectors/source-python-http-tutorial/README.md create mode 100644 airbyte-integrations/connectors/source-python-http-tutorial/build.gradle create mode 100644 airbyte-integrations/connectors/source-python-http-tutorial/main_dev.py create mode 100644 airbyte-integrations/connectors/source-python-http-tutorial/requirements.txt create mode 100644 airbyte-integrations/connectors/source-python-http-tutorial/sample_files/config.json create mode 100644 airbyte-integrations/connectors/source-python-http-tutorial/sample_files/configured_catalog.json create mode 100644 airbyte-integrations/connectors/source-python-http-tutorial/sample_files/invalid_config.json create mode 100644 airbyte-integrations/connectors/source-python-http-tutorial/sample_files/state.json create mode 100644 airbyte-integrations/connectors/source-python-http-tutorial/setup.py create mode 100644 airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/__init__.py create mode 100644 airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/schemas/TODO.md create mode 100644 airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/schemas/customers.json create mode 100644 airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/schemas/employees.json create mode 100644 airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/schemas/exchange_rates.json create mode 100644 airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/source.py create mode 100644 airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/spec.json create mode 100644 airbyte-integrations/connectors/source-python-http-tutorial/unit_tests/unit_tests.py diff --git a/airbyte-integrations/bases/base-python/CDK-README.md b/airbyte-integrations/bases/base-python/CDK-README.md index f72a2fd6894..f0ac469dbee 100644 --- a/airbyte-integrations/bases/base-python/CDK-README.md +++ b/airbyte-integrations/bases/base-python/CDK-README.md @@ -1,16 +1,14 @@ # Airbyte Connector Development Kit (CDK) -The Airbyte Python CDK is a framework for fast development of production-grade Airbyte connectors. +The Airbyte Python CDK is a framework for rapidly developing production-grade Airbyte connectors. The CDK currently offers helpers specific for creating Airbyte source connectors for: * HTTP APIs (REST APIs, GraphQL, etc..) * Singer Taps * Generic Python sources (anything not covered by the above) -It provides an improved developer experience by providing basic implementation structure and abstracting away -low-level glue boilerplate. The CDK aims to make implementing a Source as simple as possible - -reading the Source's API, and filling in a few Python function should be all that is needed. -This document is a general introduction to the CDK. Readers should be familiar with the above linked Airbyte -Specification before proceeding. +The CDK provides an improved developer experience by providing basic implementation structure and abstracting away low-level glue boilerplate. + +This document is a general introduction to the CDK. Readers should have basic familiarity with the [Airbyte Specification](https://docs.airbyte.io/architecture/airbyte-specification) before proceeding. ### The Airbyte Specification As a quick recap, the Airbyte Specification requires an Airbyte Source to support 4 distinct operations: diff --git a/airbyte-integrations/bases/base-python/docs/tutorials/http_api_source.md b/airbyte-integrations/bases/base-python/docs/tutorials/http_api_source.md new file mode 100644 index 00000000000..cf8a05c7b18 --- /dev/null +++ b/airbyte-integrations/bases/base-python/docs/tutorials/http_api_source.md @@ -0,0 +1,526 @@ +# Building a Python Source for an HTTP API + +## Summary + +This is a step-by-step guide for how to create an Airbyte source in Python to read data from an HTTP API. We'll be using the +Exchangerates API as an example since it is both simple but demonstrates a lot of the capabilities of the CDK. + +## Requirements + +* Python >= 3.7 +* Docker +* NodeJS (only used to generate the connector). We'll remove the NodeJS dependency soon. + +All the commands below assume that `python` points to a version of python >=3.7.9. On some systems, `python` points to a Python2 installation and `python3` points to Python3. If this is the case on your machine, substitute all `python` commands in this guide with `python3`. + +## Checklist +* Step 1: Create the source using the template +* Step 2: Build the newly generated source +* Step 3: Define the inputs needed by your connector +* Step 4: Implement connection checking +* Step 5: Declare the schema of your streams +* Step 6: Implement functionality for reading your streams +* Step 7: Use the connector in Airbyte +* Step 8: Write unit tests or integration tests + +Each step of the Creating a Source checklist is explained in more detail below. We also mention how you can submit the connector to ship by default with Airbyte at the end of the tutorial. + +## Explaining Each Step + +### Step 1: Create the source using template + +Airbyte provides a code generator which bootstraps the scaffolding for our connector. + +```bash +$ cd airbyte-integrations/connector-templates/generator # assumes you are starting from the root of the Airbyte project. +# Install NPM from https://www.npmjs.com/get-npm if you don't have it +$ npm install +$ npm run generate +``` + +Select the `Python HTTP CDK Source` template and then input the name of your connector. For this walk through we will refer to our source as `python-http-example`. The finalized source code for this tutorial can be found [here](https://github.com/airbytehq/airbyte/tree/master/airbyte-integrations/connectors/source-python-http-tutorial). + +The source we will build in this tutorial will pull data from the [Rates API](ratesapi.io), a free and open API which +documents historical exchange rates for fiat currencies. + +### Step 2: Install dependencies the newly generated source +Now that you've generated the module, let's navigate to its directory and install dependencies: + +```text +cd ../../connectors/source- +python -m venv .venv # Create a virtual environment in the .venv directory +source .venv/bin/activate # enable the venv +pip install -r requirements.txt +``` + +This step sets up the initial python environment. **All** subsequent `python` or `pip` commands assume you have activated your virtual environment. + +Let's verify everything is working as intended. Run: + +``` +python main_dev.py spec +``` + +You should see some output: +``` +{"type": "SPEC", "spec": {"documentationUrl": "https://docsurl.com", "connectionSpecification": {"$schema": "http://json-schema.org/draft-07/schema#", "title": "Python Http Tutorial Spec", "type": "object", "required": ["TODO"], "additionalProperties": false, "properties": {"TODO: This schema defines the configuration required for the source. This usually involves metadata such as database and/or authentication information.": {"type": "string", "description": "describe me"}}}}} +``` + +We just ran the `spec` command of the Airbyte Protocol! We'll talk more about this later, but this is a simple sanity check to make sure everything is wired up correctly. + +Note that the `main_dev.py` file is a simple script that makes it easy to run your connector. Its invocation format is `python main_dev.py [args]`. See the module's generated `README.md` for the commands it supports. + +### Notes on iteration cycle +#### Dependencies + +Python dependencies for your source should be declared in `airbyte-integrations/connectors/source-/setup.py` in the `install_requires` field. You will notice that a couple of Airbyte dependencies are already declared there. Do not remove these; they give your source access to the helper interface that is provided by the generator. + +You may notice that there is a `requirements.txt` in your source's directory as well. Don't edit this. It is autogenerated and used to provide Airbyte dependencies. All your dependencies should be declared in `setup.py`. + +#### Development Environment +The commands we ran above created a [Python virtual environment](https://docs.python.org/3/tutorial/venv.html) for your source. If you want your IDE to auto complete and resolve dependencies properly, point it at the virtual env `airbyte-integrations/connectors/source-/.venv`. Also anytime you change the dependencies in the `setup.py` make sure to re-run `pip install -r requirements.txt`. + +#### Iterating on your implementation +Everyone develops differently but here are 2 ways that we recommend iterating on a source. Consider using whichever one matches your style. + +**Run the source using python** + +You'll notice in your source's directory that there is a python file called `main_dev.py`. This file exists as convenience for development. You run it to test that your source works: + +```text +# from airbyte-integrations/connectors/source- +python main_dev.py spec +python main_dev.py check --config secrets/config.json +python main_dev.py discover --config secrets/config.json +python main_dev.py read --config secrets/config.json --catalog sample_files/configured_catalog.json +``` + +The nice thing about this approach is that you can iterate completely within in python. The downside is that you are not quite running your source as it will actually be run by Airbyte. Specifically you're not running it from within the docker container that will house it. + +**Run the source using docker** + +If you want to run your source exactly as it will be run by Airbyte \(i.e. within a docker container\), you can use the following commands from the connector module directory \(`airbyte-integrations/connectors/source-python-http-example`\): + +```text +# First build the container +docker build . -t airbyte/source-:dev + +# Then use the following commands to run it +docker run --rm airbyte/source-python-http-example:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-python-http-example:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-python-http-example:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/sample_files:/sample_files airbyte/source-python-http-example:dev read --config /secrets/config.json --catalog /sample_files/configured_catalog.json +``` + +Note: Each time you make a change to your implementation you need to re-build the connector image. `docker build . -t airbyte/source-:dev`. This ensures the new python code is added into the docker container. + +The nice thing about this approach is that you are running your source exactly as it will be run by Airbyte. The tradeoff is that iteration is slightly slower, because you need to re-build the connector between each change. + +### Step 3: Define the inputs required by your connector + +Each connector contains declares the inputs it needs to read data from the underlying data source. In the Airbyte Protocol terminology, this is is the `spec` operation. + +The simplest way to implement this is by creating a `.json` file in `source_/spec.json` which describes your connector's inputs according to the [ConnectorSpecification](https://github.com/airbytehq/airbyte/blob/master/airbyte-protocol/models/src/main/resources/airbyte_protocol/airbyte_protocol.yaml) schema. This is a good place to start when developing your source. Using JsonSchema, define what the inputs are \(e.g. username and password\). Here's [an example](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-freshdesk/source_freshdesk/spec.json) of what the `spec.json` looks like for the Freshdesk API source. + +For more details on what the spec is, you can read about the Airbyte Protocol [here](../architecture/airbyte-specification.md). + +The generated code that Airbyte provides, handles implementing the `spec` method for you. It assumes that there will be a file called `spec.json` in the same directory as `source.py`. If you have declared the necessary JsonSchema in `spec.json` you should be done with this step. + +Given that we'll pulling currency data for our example source, we'll define the following `spec.json`: + +``` +{ + "documentationUrl": "https://docs.airbyte.io/integrations/sources/exchangeratesapi", + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Python Http Tutorial Spec", + "type": "object", + "required": ["start_date", "currency_base"], + "additionalProperties": false, + "properties": { + "start_date": { + "type": "string", + "description": "Start getting data from that date.", + "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}$", + "examples": ["%Y-%m-%d"] + }, + "base": { + "type": "string", + "examples": ["USD", "EUR"] + "description": "ISO reference currency. See here." + } + } + } +} +``` + +In addition to metadata, we define two inputs: +* `start_date`: The beginning date to start tracking currency exchange rates from +* `base`: The currency whose rates we're interested in tracking + +### Step 4: Implement connection checking +The second operation in the Airbyte Protocol that we'll implement is the `check` operation. + +This operation verifies that the input configuration supplied by the user can be used to connect to the underlying data source. Note that this user-supplied configuration has the values described in the `spec.json` filled in. In other words if the `spec.json` said that the source requires a `username` and `password` the config object might be `{ "username": "airbyte", "password": "password123" }`. You should then implement something It returns a json object that reports, given the credentials in the config, whether we were able to connect to the source. + + +In our case, this is a fairly trivial check since the API requires no credentials. Instead, let's verify that the user-input `base` currency is a legitimate currency. In `source.py` we'll find the following autogenerated source: + +```python +class SourcePythonHttpTutorial(AbstractSource): + + def check_connection(self, logger, config) -> Tuple[bool, any]: + """ + TODO: Implement a connection check to validate that the user-provided config can be used to connect to the underlying API + + See https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-stripe/source_stripe/source.py#L232 + for an example. + + :param config: the user-input config object conforming the connector's spec.json + :param logger: logger object + :return Tuple[bool, any]: (True, None) if the input config can be used to connect to the API successfully, (False, error) otherwise. + """ + return True, None + +... +``` + +Following the docstring instructions, we'll change the implementation to verify that the input currency is a real currency: + +```python + def check_connection(self, logger, config) -> Tuple[bool, any]: + accepted_currencies = {"USD", "JPY", "BGN", "CZK", "DKK"} # assume these are the only allowed currencies + input_currency = config['base'] + if input_currency not in accepted_currencies: + return False, f"Input currency {input_currency} is invalid. Please input one of the following currencies: {accepted_currencies}" + else: + return True, None +``` + +Let's test out this implementation by creating two objects: a valid and an invalid config and attempt to give them as input to the connector + +``` +echo '{"start_date": "2021-04-01", "base": "USD"}' > sample_files/config.json +echo '{"start_date": "2021-04-01", "base": "BTC"}' > sample_files/invalid_config.json +python main_dev.py check --config sample_files/config.json +python main_dev.py check --config sample_files/invalid_config.json +``` + +You should see output like the following: + +``` +> python main_dev.py check --config sample_files/config.json +{"type": "CONNECTION_STATUS", "connectionStatus": {"status": "SUCCEEDED"}} + +> python main_dev.py check --config sample_files/invalid_config.json +{"type": "CONNECTION_STATUS", "connectionStatus": {"status": "FAILED", "message": "Input currency BTC is invalid. Please input one of the following currencies: {'DKK', 'USD', 'CZK', 'BGN', 'JPY'}"}} +``` + +While developing, we recommend storing configs which contain secrets in `secrets/config.json` because the `secrets` directory is gitignored by default. + +### Step 5: Declare the schema of your streams + +The `discover` method of the Airbyte Protocol returns an `AirbyteCatalog`: an object which declares all the streams output by a connector and their schemas. It also declares the sync modes supported by the stream (full refresh or incremental). See the [catalog tutorial](https://docs.airbyte.io/tutorials/beginners-guide-to-catalog) for more information. + +When using the Airbyte CDK, this is very simple to do. For each stream in our connector we'll need to: +1. Create a python `class` in `source.py` which extends `HttpStream` +2. Place a `.json` file in the `source_/schemas/` directory. The name of the file should be the snake_case name of the stream whose schema it describes, and its contents should be the JsonSchema describing the output from that stream. + +Let's create a class in `source.py` which extends `HttpStream`. You'll notice there are classes with extensive comments describing what needs to be done to implement various connector features. Feel free to read these classes as needed. But for the purposes of this tutorial, let's assume that we are adding classes from scratch either by deleting those generated classes or editing them to match the implementation below. + +We'll begin by creating a stream to represent the data that we're pulling from the Exchange Rates API: +```python +class ExchangeRates(HttpStream): + url_base = "https://api.ratesapi.io/" + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + # The API does not offer pagination, so we return None to indicate there are no more pages in the response + return None + + def path( + self, + stream_state: Mapping[str, Any] = None, + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None + ) -> str: + return "" # TODO + + def parse_response( + self, + response: requests.Response, + stream_state: Mapping[str, Any], + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None, + ) -> Iterable[Mapping]: + return None # TODO + +``` + +Note that this implementation is entirely empty -- we haven't actually done anything. We'll come back to this in the next step. But for now we just want to declare the schema of this stream. We'll declare this as a stream that the connector outputs by returning it from the `streams` method: + +```python +class SourcePythonHttpTutorial(AbstractSource): + + def check_connection(self, logger, config) -> Tuple[bool, any]: + ... + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + # NoAuth just means there is no authentication required for this API. It's only included for completeness + # of the example, but if you don't need authentication, you don't need to pass an authenticator at all. + # Other authenticators are available for API token-based auth and Oauth2. + auth = NoAuth() + return [ExchangeRates(authenticator=auth)] + +``` + +Having created this stream in code, we'll put a file `exchange_rates.json` in the `schemas/` folder. You can download the JSON file describing the output schema [here](http_api_source_assets/exchange_rates.json) for convenience and place it in `schemas/`. + +With `.json` schema file in place, let's see if the connector can now find this schema and produce a valid catalog: + +``` +python main_dev.py discover --config sample_files/config.json +``` + +you should see some output like: +``` +{"type": "CATALOG", "catalog": {"streams": [{"name": "exchange_rates", "json_schema": {"$schema": "http://json-schema.org/draft-04/schema#", "type": "object", "properties": {"base": {"type": "string"}, "rates": {"type": "object", "properties": {"GBP": {"type": "number"}, "HKD": {"type": "number"}, "IDR": {"type": "number"}, "PHP": {"type": "number"}, "LVL": {"type": "number"}, "INR": {"type": "number"}, "CHF": {"type": "number"}, "MXN": {"type": "number"}, "SGD": {"type": "number"}, "CZK": {"type": "number"}, "THB": {"type": "number"}, "BGN": {"type": "number"}, "EUR": {"type": "number"}, "MYR": {"type": "number"}, "NOK": {"type": "number"}, "CNY": {"type": "number"}, "HRK": {"type": "number"}, "PLN": {"type": "number"}, "LTL": {"type": "number"}, "TRY": {"type": "number"}, "ZAR": {"type": "number"}, "CAD": {"type": "number"}, "BRL": {"type": "number"}, "RON": {"type": "number"}, "DKK": {"type": "number"}, "NZD": {"type": "number"}, "EEK": {"type": "number"}, "JPY": {"type": "number"}, "RUB": {"type": "number"}, "KRW": {"type": "number"}, "USD": {"type": "number"}, "AUD": {"type": "number"}, "HUF": {"type": "number"}, "SEK": {"type": "number"}}}, "date": {"type": "string"}}}, "supported_sync_modes": ["full_refresh"]}]}} +``` + +it's that simple! Now the connector knows how to declare the schema of the stream in your connector. Our source is simple so we're only declaring one stream, but the principle is exactly the same if you had many streams. + +You can also dynamically define schemas, but that's beyond the scope of this tutorial. See the [schema docs](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/bases/base-python/docs/schemas.md) for more information. + +### Step 6: Read data from the API +Describing schemas is good and all, but at some point we have to start reading data! So let's get to work. But before, let's describe what we're about to do: + +The `HttpStream` superclass, like described in the [concepts documentation](../../CDK-README.md), is facilitating reading data from HTTP endpoints. It contains built-in functions or helpers for: +* authentication +* pagination +* handling rate limiting or transient errors +* and other useful functionality + +In order for it to be able to do this, we have to provide it with a few inputs: +* the URL base and path of the endpoint we'd like to hit +* how to parse the response from the API +* how to perform pagination + +Optionally, we can provide additional inputs to customize requests: +* request parameters and headers +* how to recognize rate limit errors, and how long to wait (by default it retries 429 and 5XX errors using exponential backoff) +* HTTP method and request body if applicable + +There are many other customizable options - you can find them in the [`base_python.cdk.streams.http.HttpStream`](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/bases/base-python/base_python/cdk/streams/http.py) class. + +So in order to read data from the exchange rates API, we'll fill out the necessary information for the stream to do its work. First, we'll implement a basic read that just reads the last day's exchange rates, then we'll implement incremental sync using stream slicing. + +Let's begin by pulling data for the last day's rates by using the `/latest` endpoint: + +```python +class ExchangeRates(HttpStream): + url_base = "https://api.ratesapi.io/" + + def __init__(self, base: str, **kwargs): + super().__init__() + self.base = base + + + def path( + self, + stream_state: Mapping[str, Any] = None, + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None + ) -> str: + # The "/latest" path gives us the latest currency exchange rates + return "latest" + + def request_params( + self, + stream_state: Mapping[str, Any], + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None, + ) -> MutableMapping[str, Any]: + # The api requires that we include the base currency as a query param so we do that in this method + return {'base': self.base} + + def parse_response( + self, + response: requests.Response, + stream_state: Mapping[str, Any], + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None, + ) -> Iterable[Mapping]: + # The response is a simple JSON whose schema matches our stream's schema exactly, + # so we just return a list containing the response + return [response.json()] + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + # The API does not offer pagination, + # so we return None to indicate there are no more pages in the response + return None + +``` + +This may look big, but that's just because there are lots of (unused, for now) parameters in these methods (those can be hidden with Python's `**kwargs`, but don't worry about it for now). Really we just added a few lines of "significant" code: +0. Added a constructor `__init__` which stores the `base` currency to query for. +1. `return {'base': self.base}` to add the `?base=` query parameter to the request based on the `base` input by the user +2. `return [response.json()]` to parse the response from the API to match the schema of our schema `.json` file +3. `return "latest"` to indicate that we want to hit the `/latest` endpoint of the API to get the latest exchange rate data. + +Let's also pass the `base` parameter input by the user to the stream class: + +```python +def streams(self, config: Mapping[str, Any]) -> List[Stream]: + auth = NoAuth() + return [ExchangeRates(authenticator=auth, base=config['base'])] +``` + +We're now ready to query the API! + +To do this, we'll need a [ConfiguredCatalog](https://docs.airbyte.io/tutorials/beginners-guide-to-catalog). We've prepared one [here](http_api_source_assets/configured_catalog.json) -- download this and place it in `sample_files/configured_catalog.json`. Then run: + +``` + python main_dev.py read --config sample_files/config.json --catalog sample_files/configured_catalog.json +``` + +you should see some output lines, one of which is a record from the API: + +``` +{"type": "RECORD", "record": {"stream": "exchange_rates", "data": {"base": "USD", "rates": {"GBP": 0.7196938353, "HKD": 7.7597848573, "IDR": 14482.4824162185, "ILS": 3.2412081092, "DKK": 6.1532478279, "INR": 74.7852709971, "CHF": 0.915763343, "MXN": 19.8439387671, "CZK": 21.3545717832, "SGD": 1.3261894911, "THB": 31.4398014067, "HRK": 6.2599917253, "EUR": 0.8274720728, "MYR": 4.0979726934, "NOK": 8.3043442284, "CNY": 6.4856433595, "BGN": 1.61836988, "PHP": 48.3516756309, "PLN": 3.770872983, "ZAR": 14.2690111709, "CAD": 1.2436905254, "ISK": 124.9482829954, "BRL": 5.4526272238, "RON": 4.0738932561, "NZD": 1.3841125362, "TRY": 8.3101365329, "JPY": 108.0182043856, "RUB": 74.9555647497, "KRW": 1111.7583781547, "USD": 1.0, "AUD": 1.2840711626, "HUF": 300.6206040546, "SEK": 8.3829540753}, "date": "2021-04-26"}, "emitted_at": 1619498062000}} +``` + +There we have it - a stream which reads data in just a few lines of code! + +We theoretically _could_ stop here and call it a connector. But let's add incremental sync before we do that. + +#### Adding incremental sync +To add incremental sync, we'll do a few things: +1. Pass the `start_date` param input by the user into the stream +2. Declare the stream's `cursor_field` +3. Implement the `get_updated_state` method +4. Implement the `stream_slices` method +5. Update the `path` method to specify the date to pull exchange rates for +6. Update the configured catalog to use `incremental` sync when we're testing the stream + +We'll describe what each of these methods do below. Before we begin, it may help to familiarize yourself with how incremental sync works in Airbyte by reading the [docs on incremental](https://docs.airbyte.io/architecture/connections/incremental-append). + +To keep things concise, we'll only show functions as we edit them one by one. + +Let's get the easy parts out of the way and pass the `start_date`: + +```python +def streams(self, config: Mapping[str, Any]) -> List[Stream]: + auth = NoAuth() + # Parse the date from a string into a datetime object + start_date = datetime.strptime(config['start_date'], '%Y-%m-%d') + return [ExchangeRates(authenticator=auth, base=config['base'], start_date=start_date)] +``` + +let's also add this parameter to the constructor and declare the `cursor_field`: + +```python +from datetime import datetime, timedelta + + +class ExchangeRates(HttpStream): + url_base = "https://api.ratesapi.io/" + cursor_field = "date" + + def __init__(self, base: str, start_date: datetime, **kwargs): + super().__init__() + self.base = base + self.start_date = start_date +``` + +declaring the `cursor_field` informs the framework that this stream now supports incremental sync. The next time you run `python main_dev.py discover --config sample_files/config.json` you'll find that the `supported_sync_modes` field now also contains `incremental`. + +But we're not quite done with supporting incremental, we have to actually emit state! We'll structure our state object very simply: it will be a `dict` whose single key is `'date'` and value is the date of the last day we synced data from e.g: `{'date': '2021-04-26'}` indicates the connector previously read data up until April 26th and therefore shouldn't re-read anything before April 26th. + + Let's do this by implementing the `get_updated_state` method inside the `ExchangeRates` class. + +```python + def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> Mapping[str, any]: + # This method is called once for each record returned from the API to compare the cursor field value in that record with the current state + # we then return an updated state object. If this is the first time we run a sync or no state was passed, current_stream_state will be None. + if current_stream_state is not None and 'date' in current_stream_state: + current_parsed_date = datetime.strptime(current_stream_state['date'], '%Y-%m-%d') + latest_record_date = datetime.strptime(latest_record['date'], '%Y-%m-%d') + return {'date': max(current_parsed_date, latest_record_date).strftime('%Y-%m-%d')} + else: + return {'date': self.start_date.strftime('%Y-%m-%d')} +``` + +This implementation compares the date from the latest record with the date in the current state and takes the maximum as the "new" state object. + +We'll implement the `stream_slices` method to return a list of the dates for which we should pull data based on the stream state if it exists: + +```python + def _chunk_date_range(self, start_date: datetime) -> List[Mapping[str, any]]: + """ + Returns a list of each day between the start date and now. + The return value is a list of dicts {'date': date_string}. + """ + dates = [] + while start_date < datetime.now(): + dates.append({'date': start_date.strftime('%Y-%m-%d')}) + start_date += timedelta(days=1) + return dates + + def stream_slices(self, sync_mode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None) -> Iterable[ + Optional[Mapping[str, any]]]: + start_date = datetime.strptime(stream_state['date'], '%Y-%m-%d') if stream_state and 'date' in stream_state else self.start_date + return self._chunk_date_range(start_date) +``` + +Each slice will cause an HTTP request to be made to the API. We can then use the information present in the `stream_slice` parameter (a single element from the list we constructed in `stream_slices` above) to set other configurations for the outgoing request like `path` or `request_params`. For more info about stream slicing, see [the slicing docs](../concepts/stream_slices.md). + +In order to pull data for a specific date, the Exchange Rates API requires that we pass the date as the path component of the URL. Let's override the `path` method to achieve this: +```python +def path(self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None) -> str: + return stream_slice['date'] +``` + +With these changes, your implementation should look like the file [here](https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/source.py) + +Last thing we need to do is change the `sync_mode` field in the `sample_files/configured_catalog.json` to `incremental`: +``` +"sync_mode": "incremental", +``` + +We should now have a working implementation of incremental sync! Let's try it out: + +``` +python main_dev.py read --config sample_files/config.json --catalog sample_files/configured_catalog.json +``` + +You should a bunch of `RECORD` messages and `STATE` messages. To verify that incremental sync is working, pass the input state back to the connector and run it again: +``` +# Save the latest state to sample_files/state.json +python main_dev.py read --config sample_files/config.json --catalog sample_files/configured_catalog.json | grep STATE | tail -n 1 | jq .state.data > sample_files/state.json + +# Run a read operation with the latest state message +python main_dev.py read --config sample_files/config.json --catalog sample_files/configured_catalog.json --state sample_files/state.json +``` + +You should see that only the record from the last date is being synced! This is acceptable behavior, since Airbyte requires at-least-once delivery of records, so repeating the last record twice is OK. + +With that, we've implemented incremental sync for our connector! + +### Step 7: Use the connector in Airbyte +To use your connector in your own installation of Airbyte, build the docker image for your container by running `docker build . -t airbyte/source-python-http-example:dev`. Then, follow the instructions from the [building a toy source tutorial](https://docs.airbyte.io/tutorials/toy-connector#use-the-connector-in-the-airbyte-ui) for using the connector in the Airbyte UI, replacing the name as appropriate. + +Note: your built docker image must be accessible to the `docker` daemon running on the Airbyte node. If you're doing this tutorial locally, then the instructions here are sufficient. Otherwise you may need to push your Docker image to Dockerhub. + +### Step 8: Test your connector +#### Unit Tests +Add any relevant unit tests to the `unit_tests` directory. Unit tests should _not_ depend on any secrets. + +You can run the tests using `python -m pytest -s unit_tests` + +#### Integration Tests +Place any integration tests in the `integration_tests` directory such that they can be [discovered by pytest](https://docs.pytest.org/en/reorganize-docs/new-docs/user/naming_conventions.html). + +#### Standard Tests +Standard tests are a fixed set of tests Airbyte provides that every Airbyte source connector must pass. While they're only required if you intend to submit your connector to Airbyte, you might find them helpful in any case. See [Testing your connectors](https://docs.airbyte.io/contributing-to-airbyte/building-new-connector/testing-connectors) + +If you want to submit this connector to become a default connector within Airbyte, follow +steps 8 onwards from the [Python source checklist](https://docs.airbyte.io/tutorials/building-a-python-source#step-8-set-up-standard-tests) diff --git a/airbyte-integrations/bases/base-python/docs/tutorials/http_api_source_assets/configured_catalog.json b/airbyte-integrations/bases/base-python/docs/tutorials/http_api_source_assets/configured_catalog.json new file mode 100644 index 00000000000..66ab9be9e7b --- /dev/null +++ b/airbyte-integrations/bases/base-python/docs/tutorials/http_api_source_assets/configured_catalog.json @@ -0,0 +1,131 @@ +{ + "streams": [ + { + "stream": { + "name": "exchange_rates", + "json_schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "properties": { + "base": { + "type": "string" + }, + "rates": { + "type": "object", + "properties": { + "GBP": { + "type": "number" + }, + "HKD": { + "type": "number" + }, + "IDR": { + "type": "number" + }, + "PHP": { + "type": "number" + }, + "LVL": { + "type": "number" + }, + "INR": { + "type": "number" + }, + "CHF": { + "type": "number" + }, + "MXN": { + "type": "number" + }, + "SGD": { + "type": "number" + }, + "CZK": { + "type": "number" + }, + "THB": { + "type": "number" + }, + "BGN": { + "type": "number" + }, + "EUR": { + "type": "number" + }, + "MYR": { + "type": "number" + }, + "NOK": { + "type": "number" + }, + "CNY": { + "type": "number" + }, + "HRK": { + "type": "number" + }, + "PLN": { + "type": "number" + }, + "LTL": { + "type": "number" + }, + "TRY": { + "type": "number" + }, + "ZAR": { + "type": "number" + }, + "CAD": { + "type": "number" + }, + "BRL": { + "type": "number" + }, + "RON": { + "type": "number" + }, + "DKK": { + "type": "number" + }, + "NZD": { + "type": "number" + }, + "EEK": { + "type": "number" + }, + "JPY": { + "type": "number" + }, + "RUB": { + "type": "number" + }, + "KRW": { + "type": "number" + }, + "USD": { + "type": "number" + }, + "AUD": { + "type": "number" + }, + "HUF": { + "type": "number" + }, + "SEK": { + "type": "number" + } + } + }, + "date": { + "type": "string" + } + } + }, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/bases/base-python/docs/tutorials/http_api_source_assets/exchange_rates.json b/airbyte-integrations/bases/base-python/docs/tutorials/http_api_source_assets/exchange_rates.json new file mode 100644 index 00000000000..7476b088094 --- /dev/null +++ b/airbyte-integrations/bases/base-python/docs/tutorials/http_api_source_assets/exchange_rates.json @@ -0,0 +1,116 @@ +{ + "type": "object", + "required": ["base", "date", "rates"], + "properties": { + "base": { + "type": "string" + }, + "date": { + "type": "string" + }, + "rates": { + "type": "object", + "properties": { + "CAD": { + "type": ["null", "number"] + }, + "HKD": { + "type": ["null", "number"] + }, + "ISK": { + "type": ["null", "number"] + }, + "PHP": { + "type": ["null", "number"] + }, + "DKK": { + "type": ["null", "number"] + }, + "HUF": { + "type": ["null", "number"] + }, + "CZK": { + "type": ["null", "number"] + }, + "GBP": { + "type": ["null", "number"] + }, + "RON": { + "type": ["null", "number"] + }, + "SEK": { + "type": ["null", "number"] + }, + "IDR": { + "type": ["null", "number"] + }, + "INR": { + "type": ["null", "number"] + }, + "BRL": { + "type": ["null", "number"] + }, + "RUB": { + "type": ["null", "number"] + }, + "HRK": { + "type": ["null", "number"] + }, + "JPY": { + "type": ["null", "number"] + }, + "THB": { + "type": ["null", "number"] + }, + "CHF": { + "type": ["null", "number"] + }, + "EUR": { + "type": ["null", "number"] + }, + "MYR": { + "type": ["null", "number"] + }, + "BGN": { + "type": ["null", "number"] + }, + "TRY": { + "type": ["null", "number"] + }, + "CNY": { + "type": ["null", "number"] + }, + "NOK": { + "type": ["null", "number"] + }, + "NZD": { + "type": ["null", "number"] + }, + "ZAR": { + "type": ["null", "number"] + }, + "USD": { + "type": ["null", "number"] + }, + "MXN": { + "type": ["null", "number"] + }, + "SGD": { + "type": ["null", "number"] + }, + "AUD": { + "type": ["null", "number"] + }, + "ILS": { + "type": ["null", "number"] + }, + "KRW": { + "type": ["null", "number"] + }, + "PLN": { + "type": ["null", "number"] + } + } + } + } +} diff --git a/airbyte-integrations/connector-templates/generator/build.gradle b/airbyte-integrations/connector-templates/generator/build.gradle index b7cf2163c5d..3ffd37f98a0 100644 --- a/airbyte-integrations/connector-templates/generator/build.gradle +++ b/airbyte-integrations/connector-templates/generator/build.gradle @@ -39,6 +39,6 @@ def addScaffoldTemplateTask(name, packageName,scaffoldParams=[]) { } addScaffoldTemplateTask('Python Source', 'scaffold-source-python') -addScaffoldTemplateTask('Python HTTP CDK Source', 'scaffold-source-http') +addScaffoldTemplateTask('Python HTTP API Source', 'scaffold-source-http') // TODO: enable Singer template testing //addScaffoldTask('source-python-singer', ['tap-exchangeratesapi']) diff --git a/airbyte-integrations/connector-templates/generator/plopfile.js b/airbyte-integrations/connector-templates/generator/plopfile.js index b6a76f552c7..a05a3bb6e96 100644 --- a/airbyte-integrations/connector-templates/generator/plopfile.js +++ b/airbyte-integrations/connector-templates/generator/plopfile.js @@ -37,8 +37,8 @@ module.exports = function (plop) { console.log(getSuccessMessage(answers.name, plopApi.renderString(config.outputPath, answers), config.message)); }); - plop.setGenerator('Python HTTP CDK Source', { - description: 'Generate a Source that pulls data from a synchronous HTTP API built on the Airbyte CDK.', + plop.setGenerator('Python HTTP API Source', { + description: 'Generate a Source that pulls data from a synchronous HTTP API.', prompts: [{type: 'input', name: 'name', message: 'Source name e.g: "google-analytics"'}], actions: [ { diff --git a/airbyte-integrations/connector-templates/source-python-http-api/setup.py.hbs b/airbyte-integrations/connector-templates/source-python-http-api/setup.py.hbs index bc7a448037e..685b6a2a5d7 100644 --- a/airbyte-integrations/connector-templates/source-python-http-api/setup.py.hbs +++ b/airbyte-integrations/connector-templates/source-python-http-api/setup.py.hbs @@ -31,5 +31,5 @@ setup( author_email="contact@airbyte.io", packages=find_packages(), install_requires=["airbyte-protocol", "base-python", "pytest==6.1.2"], - package_data={"": ["*.json"]} + package_data={"": ["*.json", "schemas/*.json", "schemas/shared/*.json"]} ) diff --git a/airbyte-integrations/connector-templates/source-python-http-api/source_{{snakeCase name}}/source.py.hbs b/airbyte-integrations/connector-templates/source-python-http-api/source_{{snakeCase name}}/source.py.hbs index 8a7eccc4fd9..032c9e15beb 100644 --- a/airbyte-integrations/connector-templates/source-python-http-api/source_{{snakeCase name}}/source.py.hbs +++ b/airbyte-integrations/connector-templates/source-python-http-api/source_{{snakeCase name}}/source.py.hbs @@ -28,7 +28,7 @@ from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple import requests from base_python import AbstractSource, HttpStream, Stream -from base_python.cdk.streams.auth.core import TokenAuthenticator +from base_python.cdk.streams.auth.token import TokenAuthenticator """ TODO: Most comments in this class are instructive and should be deleted after the source is implemented. @@ -197,7 +197,7 @@ class Source{{properCase name}}(AbstractSource): See https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-stripe/source_stripe/source.py#L232 for an example. - :param config: the user-input config object conforming the connector's spec.json + :param config: the user-input config object conforming to the connector's spec.json :param logger: logger object :return Tuple[bool, any]: (True, None) if the input config can be used to connect to the API successfully, (False, error) otherwise. """ diff --git a/airbyte-integrations/connector-templates/source-python-http-api/source_{{snakeCase name}}/spec.json.hbs b/airbyte-integrations/connector-templates/source-python-http-api/source_{{snakeCase name}}/spec.json.hbs index 7ad9a9e5eb4..a0ad81e0a3c 100644 --- a/airbyte-integrations/connector-templates/source-python-http-api/source_{{snakeCase name}}/spec.json.hbs +++ b/airbyte-integrations/connector-templates/source-python-http-api/source_{{snakeCase name}}/spec.json.hbs @@ -1,15 +1,13 @@ -// TODO: This schema defines the configuration required for the source. This usually involves metadata such as database and/or authentication information. -// Delete this comment after reading. { "documentationUrl": "https://docsurl.com", "connectionSpecification": { "$schema": "http://json-schema.org/draft-07/schema#", "title": "{{titleCase name}} Spec", "type": "object", - "required": ["fix-me"], + "required": ["TODO"], "additionalProperties": false, "properties": { - "fix-me": { + "TODO: This schema defines the configuration required for the source. This usually involves metadata such as database and/or authentication information.": { "type": "string", "description": "describe me" } diff --git a/airbyte-integrations/connectors/source-exchange-rates/source_exchange_rates/spec.json b/airbyte-integrations/connectors/source-exchange-rates/source_exchange_rates/spec.json index bf6803670be..f7273d81d8d 100644 --- a/airbyte-integrations/connectors/source-exchange-rates/source_exchange_rates/spec.json +++ b/airbyte-integrations/connectors/source-exchange-rates/source_exchange_rates/spec.json @@ -1,10 +1,10 @@ { - "documentationUrl": "https://docs.airbyte.io/integrations/sources/ratesapi-io", + "documentationUrl": "https://docs.airbyte.io/integrations/sources/exchangeratesapi", "connectionSpecification": { "$schema": "http://json-schema.org/draft-07/schema#", "title": "ratesapi.io Source Spec", "type": "object", - "required": ["start_date", "base"], + "required": ["start_date", "currency_base"], "additionalProperties": false, "properties": { "start_date": { diff --git a/airbyte-integrations/connectors/source-python-http-tutorial/.dockerignore b/airbyte-integrations/connectors/source-python-http-tutorial/.dockerignore new file mode 100644 index 00000000000..6eed88c0102 --- /dev/null +++ b/airbyte-integrations/connectors/source-python-http-tutorial/.dockerignore @@ -0,0 +1,6 @@ +* +!Dockerfile +!Dockerfile.test +!source_python_http_tutorial +!setup.py +!secrets diff --git a/airbyte-integrations/connectors/source-python-http-tutorial/Dockerfile b/airbyte-integrations/connectors/source-python-http-tutorial/Dockerfile new file mode 100644 index 00000000000..e1dcf562106 --- /dev/null +++ b/airbyte-integrations/connectors/source-python-http-tutorial/Dockerfile @@ -0,0 +1,16 @@ +FROM airbyte/integration-base-python:0.1.5 + +# Bash is installed for more convenient debugging. +RUN apt-get update && apt-get install -y bash && rm -rf /var/lib/apt/lists/* + +ENV CODE_PATH="source_python_http_tutorial" +ENV AIRBYTE_IMPL_MODULE="source_python_http_tutorial" +ENV AIRBYTE_IMPL_PATH="SourcePythonHttpTutorial" + +WORKDIR /airbyte/integration_code +COPY $CODE_PATH ./$CODE_PATH +COPY setup.py ./ +RUN pip install . + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/source-python-http-tutorial diff --git a/airbyte-integrations/connectors/source-python-http-tutorial/README.md b/airbyte-integrations/connectors/source-python-http-tutorial/README.md new file mode 100644 index 00000000000..775063433e0 --- /dev/null +++ b/airbyte-integrations/connectors/source-python-http-tutorial/README.md @@ -0,0 +1,100 @@ +# Python Http Tutorial Source + +This is the repository for the Python Http Tutorial source connector, written in Python. +For information about how to use this connector within Airbyte, see [the documentation](https://docs.airbyte.io/integrations/sources/python-http-tutorial). + +## Local development + +### Prerequisites +**To iterate on this connector, make sure to complete this prerequisites section.** + +#### Build & Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` + +This will generate a virtualenv for this module in `.venv/`. Make sure this venv is active in your +development environment of choice. To activate it from the terminal, run: +``` +source .venv/bin/activate +pip install -r requirements.txt +``` +If you are in an IDE, follow your IDE's instructions to activate the virtualenv. + +Note that while we are installing dependencies from `requirements.txt`, you should only edit `setup.py` for your dependencies. `requirements.txt` is +used for editable installs (`pip install -e`) to pull in Python dependencies from the monorepo and will call `setup.py`. +If this is mumbo jumbo to you, don't worry about it, just put your deps in `setup.py` but install using `pip install -r requirements.txt` and everything +should work as you expect. + +#### Building via Gradle +You can also build the connector in Gradle. This is typically used in CI and not needed for your development workflow. + +To build using Gradle, from the Airbyte repository root, run: +``` +./gradlew :airbyte-integrations:connectors:source-python-http-tutorial:build +``` + +#### Create credentials +**If you are a community contributor**, follow the instructions in the [documentation](https://docs.airbyte.io/integrations/sources/python-http-tutorial) +to generate the necessary credentials. Then create a file `secrets/config.json` conforming to the `source_python_http_tutorial/spec.json` file. +Note that any directory named `secrets` is gitignored across the entire Airbyte repo, so there is no danger of accidentally checking in sensitive information. +See `sample_files/sample_config.json` for a sample config file. + +**If you are an Airbyte core member**, copy the credentials in Lastpass under the secret name `source python-http-tutorial test creds` +and place them into `secrets/config.json`. + + +### Locally running the connector +``` +python main_dev.py spec +python main_dev.py check --config secrets/config.json +python main_dev.py discover --config secrets/config.json +python main_dev.py read --config secrets/config.json --catalog sample_files/configured_catalog.json +``` + +### Unit Tests +To run unit tests locally, from the connector directory run: +``` +python -m pytest unit_tests +``` + +### Locally running the connector docker image + +#### Build +First, make sure you build the latest Docker image: +``` +docker build . -t airbyte/source-python-http-tutorial:dev +``` + +You can also build the connector image via Gradle: +``` +./gradlew :airbyte-integrations:connectors:source-python-http-tutorial:airbyteDocker +``` +When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in +the Dockerfile. + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/source-python-http-tutorial:dev spec +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-python-http-tutorial:dev check --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets airbyte/source-python-http-tutorial:dev discover --config /secrets/config.json +docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/sample_files:/sample_files airbyte/source-python-http-tutorial:dev read --config /secrets/config.json --catalog /sample_files/configured_catalog.json +``` + +### Integration Tests +1. From the airbyte project root, run `./gradlew :airbyte-integrations:connectors:source-python-http-tutorial:integrationTest` to run the standard integration test suite. +1. To run additional integration tests, place your integration tests in a new directory `integration_tests` and run them with `python -m pytest -s integration_tests`. + Make sure to familiarize yourself with [pytest test discovery](https://docs.pytest.org/en/latest/goodpractices.html#test-discovery) to know how your test files and methods should be named. + +## Dependency Management +All of your dependencies should go in `setup.py`, NOT `requirements.txt`. The requirements file is only used to connect internal Airbyte dependencies in the monorepo for local development. + +### Publishing a new version of the connector +You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what? +1. Make sure your changes are passing unit and integration tests +1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use SemVer). +1. Create a Pull Request +1. Pat yourself on the back for being an awesome contributor +1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master diff --git a/airbyte-integrations/connectors/source-python-http-tutorial/build.gradle b/airbyte-integrations/connectors/source-python-http-tutorial/build.gradle new file mode 100644 index 00000000000..0a28ccb37fa --- /dev/null +++ b/airbyte-integrations/connectors/source-python-http-tutorial/build.gradle @@ -0,0 +1,32 @@ +plugins { + id 'airbyte-python' + id 'airbyte-docker' + id 'airbyte-standard-source-test-file' +} + +airbytePython { + moduleDirectory 'source_python_http_tutorial' +} + +airbyteStandardSourceTestFile { + // For more information on standard source tests, see https://docs.airbyte.io/contributing-to-airbyte/building-new-connector/testing-connectors + + // All these input paths must live inside this connector's directory (or subdirectories) + // TODO update the spec JSON file + specPath = "source_python_http_tutorial/spec.json" + + // configPath points to a config file which matches the spec.json supplied above. secrets/ is gitignored by default, so place your config file + // there (in case it contains any credentials) + // TODO update the config file to contain actual credentials + configPath = "secrets/config.json" + // TODO update the sample configured_catalog JSON for use in testing + // Note: If your source supports incremental syncing, then make sure that the catalog that is returned in the get_catalog method is configured + // for incremental syncing (e.g. include cursor fields, etc). + configuredCatalogPath = "sample_files/configured_catalog.json" +} + + +dependencies { + implementation files(project(':airbyte-integrations:bases:base-standard-source-test-file').airbyteDocker.outputs) + implementation files(project(':airbyte-integrations:bases:base-python').airbyteDocker.outputs) +} diff --git a/airbyte-integrations/connectors/source-python-http-tutorial/main_dev.py b/airbyte-integrations/connectors/source-python-http-tutorial/main_dev.py new file mode 100644 index 00000000000..656a00f5bff --- /dev/null +++ b/airbyte-integrations/connectors/source-python-http-tutorial/main_dev.py @@ -0,0 +1,55 @@ +# MIT License +# +# Copyright (c) 2020 Airbyte +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +""" +MIT License + +Copyright (c) 2020 Airbyte + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" + +import sys + +from base_python.entrypoint import launch +from source_python_http_tutorial import SourcePythonHttpTutorial + +if __name__ == "__main__": + source = SourcePythonHttpTutorial() + launch(source, sys.argv[1:]) diff --git a/airbyte-integrations/connectors/source-python-http-tutorial/requirements.txt b/airbyte-integrations/connectors/source-python-http-tutorial/requirements.txt new file mode 100644 index 00000000000..dd447512e62 --- /dev/null +++ b/airbyte-integrations/connectors/source-python-http-tutorial/requirements.txt @@ -0,0 +1,4 @@ +# This file is autogenerated -- only edit if you know what you are doing. Use setup.py for declaring dependencies. +-e ../../bases/airbyte-protocol +-e ../../bases/base-python +-e . diff --git a/airbyte-integrations/connectors/source-python-http-tutorial/sample_files/config.json b/airbyte-integrations/connectors/source-python-http-tutorial/sample_files/config.json new file mode 100644 index 00000000000..2e4bbdfb3b6 --- /dev/null +++ b/airbyte-integrations/connectors/source-python-http-tutorial/sample_files/config.json @@ -0,0 +1 @@ +{ "start_date": "2021-04-01", "base": "USD" } diff --git a/airbyte-integrations/connectors/source-python-http-tutorial/sample_files/configured_catalog.json b/airbyte-integrations/connectors/source-python-http-tutorial/sample_files/configured_catalog.json new file mode 100644 index 00000000000..8c34f50528b --- /dev/null +++ b/airbyte-integrations/connectors/source-python-http-tutorial/sample_files/configured_catalog.json @@ -0,0 +1,131 @@ +{ + "streams": [ + { + "stream": { + "name": "exchange_rates", + "json_schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "properties": { + "base": { + "type": "string" + }, + "rates": { + "type": "object", + "properties": { + "GBP": { + "type": "number" + }, + "HKD": { + "type": "number" + }, + "IDR": { + "type": "number" + }, + "PHP": { + "type": "number" + }, + "LVL": { + "type": "number" + }, + "INR": { + "type": "number" + }, + "CHF": { + "type": "number" + }, + "MXN": { + "type": "number" + }, + "SGD": { + "type": "number" + }, + "CZK": { + "type": "number" + }, + "THB": { + "type": "number" + }, + "BGN": { + "type": "number" + }, + "EUR": { + "type": "number" + }, + "MYR": { + "type": "number" + }, + "NOK": { + "type": "number" + }, + "CNY": { + "type": "number" + }, + "HRK": { + "type": "number" + }, + "PLN": { + "type": "number" + }, + "LTL": { + "type": "number" + }, + "TRY": { + "type": "number" + }, + "ZAR": { + "type": "number" + }, + "CAD": { + "type": "number" + }, + "BRL": { + "type": "number" + }, + "RON": { + "type": "number" + }, + "DKK": { + "type": "number" + }, + "NZD": { + "type": "number" + }, + "EEK": { + "type": "number" + }, + "JPY": { + "type": "number" + }, + "RUB": { + "type": "number" + }, + "KRW": { + "type": "number" + }, + "USD": { + "type": "number" + }, + "AUD": { + "type": "number" + }, + "HUF": { + "type": "number" + }, + "SEK": { + "type": "number" + } + } + }, + "date": { + "type": "string" + } + } + }, + "supported_sync_modes": ["full_refresh"] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/connectors/source-python-http-tutorial/sample_files/invalid_config.json b/airbyte-integrations/connectors/source-python-http-tutorial/sample_files/invalid_config.json new file mode 100644 index 00000000000..779b9ee5d1e --- /dev/null +++ b/airbyte-integrations/connectors/source-python-http-tutorial/sample_files/invalid_config.json @@ -0,0 +1 @@ +{ "start_date": "2021-04-01", "base": "BTC" } diff --git a/airbyte-integrations/connectors/source-python-http-tutorial/sample_files/state.json b/airbyte-integrations/connectors/source-python-http-tutorial/sample_files/state.json new file mode 100644 index 00000000000..e0fe91325ce --- /dev/null +++ b/airbyte-integrations/connectors/source-python-http-tutorial/sample_files/state.json @@ -0,0 +1,5 @@ +{ + "exchange_rates": { + "date": "2021-04-26" + } +} diff --git a/airbyte-integrations/connectors/source-python-http-tutorial/setup.py b/airbyte-integrations/connectors/source-python-http-tutorial/setup.py new file mode 100644 index 00000000000..ebf0e13764d --- /dev/null +++ b/airbyte-integrations/connectors/source-python-http-tutorial/setup.py @@ -0,0 +1,58 @@ +# MIT License +# +# Copyright (c) 2020 Airbyte +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +""" +MIT License + +Copyright (c) 2020 Airbyte + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" + +from setuptools import find_packages, setup + +setup( + name="source_python_http_tutorial", + description="Source implementation for Python Http Tutorial.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=["airbyte-protocol", "base-python", "pytest==6.1.2"], + package_data={"": ["*.json"]}, +) diff --git a/airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/__init__.py b/airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/__init__.py new file mode 100644 index 00000000000..0e6061c8a91 --- /dev/null +++ b/airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/__init__.py @@ -0,0 +1,27 @@ +""" +MIT License + +Copyright (c) 2020 Airbyte + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" + +from .source import SourcePythonHttpTutorial + +__all__ = ["SourcePythonHttpTutorial"] diff --git a/airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/schemas/TODO.md b/airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/schemas/TODO.md new file mode 100644 index 00000000000..cf1efadb3c9 --- /dev/null +++ b/airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/schemas/TODO.md @@ -0,0 +1,25 @@ +# TODO: Define your stream schemas +Your connector must describe the schema of each stream it can output using [JSONSchema](https://json-schema.org). + +The simplest way to do this is to describe the schema of your streams using one `.json` file per stream. You can also dynamically generate the schema of your stream in code, or you can combine both approaches: start with a `.json` file and dynamically add properties to it. + +The schema of a stream is the return value of `Stream.get_json_schema`. + +## Static schemas +By default, `Stream.get_json_schema` reads a `.json` file in the `schemas/` directory whose name is equal to the value of the `Stream.name` property. In turn `Stream.name` by default returns the name of the class in snake case. Therefore, if you have a class `class EmployeeBenefits(HttpStream)` the default behavior will look for a file called `schemas/employee_benefits.json`. You can override any of these behaviors as you need. + +Important note: any objects referenced via `$ref` should be placed in the `shared/` directory in their own `.json` files. + +## Dynamic schemas +If you'd rather define your schema in code, override `Stream.get_json_schema` in your stream class to return a `dict` describing the schema using [JSONSchema](https://json-schema.org). + +## Dynamically modifying static schemas +Override `Stream.get_json_schema` to run the default behavior, edit the returned value, then return the edited value: +``` +def get_json_schema(self): + schema = super().get_json_schema() + schema['dynamically_determined_property'] = "property" + return schema +``` + +Delete this file once you're done. Or don't. Up to you :) diff --git a/airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/schemas/customers.json b/airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/schemas/customers.json new file mode 100644 index 00000000000..9a4b1348583 --- /dev/null +++ b/airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/schemas/customers.json @@ -0,0 +1,16 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "signup_date": { + "type": ["null", "string"], + "format": "date-time" + } + } +} diff --git a/airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/schemas/employees.json b/airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/schemas/employees.json new file mode 100644 index 00000000000..2fa01a0fa1f --- /dev/null +++ b/airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/schemas/employees.json @@ -0,0 +1,19 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "years_of_service": { + "type": ["null", "integer"] + }, + "start_date": { + "type": ["null", "string"], + "format": "date-time" + } + } +} diff --git a/airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/schemas/exchange_rates.json b/airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/schemas/exchange_rates.json new file mode 100644 index 00000000000..80b47d0eeee --- /dev/null +++ b/airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/schemas/exchange_rates.json @@ -0,0 +1,119 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "properties": { + "base": { + "type": "string" + }, + "rates": { + "type": "object", + "properties": { + "GBP": { + "type": "number" + }, + "HKD": { + "type": "number" + }, + "IDR": { + "type": "number" + }, + "PHP": { + "type": "number" + }, + "LVL": { + "type": "number" + }, + "INR": { + "type": "number" + }, + "CHF": { + "type": "number" + }, + "MXN": { + "type": "number" + }, + "SGD": { + "type": "number" + }, + "CZK": { + "type": "number" + }, + "THB": { + "type": "number" + }, + "BGN": { + "type": "number" + }, + "EUR": { + "type": "number" + }, + "MYR": { + "type": "number" + }, + "NOK": { + "type": "number" + }, + "CNY": { + "type": "number" + }, + "HRK": { + "type": "number" + }, + "PLN": { + "type": "number" + }, + "LTL": { + "type": "number" + }, + "TRY": { + "type": "number" + }, + "ZAR": { + "type": "number" + }, + "CAD": { + "type": "number" + }, + "BRL": { + "type": "number" + }, + "RON": { + "type": "number" + }, + "DKK": { + "type": "number" + }, + "NZD": { + "type": "number" + }, + "EEK": { + "type": "number" + }, + "JPY": { + "type": "number" + }, + "RUB": { + "type": "number" + }, + "KRW": { + "type": "number" + }, + "USD": { + "type": "number" + }, + "AUD": { + "type": "number" + }, + "HUF": { + "type": "number" + }, + "SEK": { + "type": "number" + } + } + }, + "date": { + "type": "string" + } + } +} diff --git a/airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/source.py b/airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/source.py new file mode 100644 index 00000000000..75ce0568dfb --- /dev/null +++ b/airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/source.py @@ -0,0 +1,122 @@ +# MIT License +# +# Copyright (c) 2020 Airbyte +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +from datetime import datetime, timedelta +from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple + +import requests +from base_python import AbstractSource, HttpStream, Stream +from base_python.cdk.streams.auth.core import NoAuth + + +class ExchangeRates(HttpStream): + url_base = "https://api.ratesapi.io/" + cursor_field = "date" + + def __init__(self, base: str, start_date: datetime, **kwargs): + super().__init__(**kwargs) + self.base = base + self.start_date = start_date + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + # The API does not offer pagination, so we return None to indicate there are no more pages in the response + return None + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + return stream_slice["date"] + + def request_params( + self, + stream_state: Mapping[str, Any], + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None, + ) -> MutableMapping[str, Any]: + # The api requires that we include the base currency as a query param so we do that in this method + return {"base": self.base} + + def parse_response( + self, + response: requests.Response, + stream_state: Mapping[str, Any], + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None, + ) -> Iterable[Mapping]: + # The response is a simple JSON whose schema matches our stream's schema exactly, + # so we just return a list containing the response + return [response.json()] + + def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> Mapping[str, any]: + # This method is called once for each record returned from the API to compare the cursor field value in that record with the current state + # we then return an updated state object. If this is the first time we run a sync or no state was passed, current_stream_state will be None. + if current_stream_state is not None and "date" in current_stream_state: + current_parsed_date = datetime.strptime(current_stream_state["date"], "%Y-%m-%d") + latest_record_date = datetime.strptime(latest_record["date"], "%Y-%m-%d") + return {"date": max(current_parsed_date, latest_record_date).strftime("%Y-%m-%d")} + else: + return {"date": self.start_date.strftime("%Y-%m-%d")} + + def _chunk_date_range(self, start_date: datetime) -> List[Mapping[str, any]]: + """ + Returns a list of each day between the start date and now. + The return value is a list of dicts {'date': date_string}. + """ + dates = [] + while start_date < datetime.now(): + self.logger.info(start_date.strftime("%Y-%m-%d")) + dates.append({"date": start_date.strftime("%Y-%m-%d")}) + start_date += timedelta(days=1) + + return dates + + def stream_slices( + self, sync_mode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None + ) -> Iterable[Optional[Mapping[str, any]]]: + start_date = datetime.strptime(stream_state["date"], "%Y-%m-%d") if stream_state and "date" in stream_state else self.start_date + return self._chunk_date_range(start_date) + + +class SourcePythonHttpTutorial(AbstractSource): + def check_connection(self, logger, config) -> Tuple[bool, any]: + accepted_currencies = { + "USD", + "JPY", + "BGN", + "CZK", + "DKK", + } # there are more currencies but let's assume these are the only allowed ones + input_currency = config["base"] + if input_currency not in accepted_currencies: + return False, f"Input currency {input_currency} is invalid. Please input one of the following currencies: {accepted_currencies}" + else: + return True, None + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + # NoAuth just means there is no authentication required for this API. It's only included for completeness + # of the example, but if you don't need authentication, you don't need to pass an authenticator at all. + # Other authenticators are available for API token-based auth and Oauth2. + auth = NoAuth() + # Parse the date from a string into a datetime object + start_date = datetime.strptime(config["start_date"], "%Y-%m-%d") + return [ExchangeRates(authenticator=auth, base=config["base"], start_date=start_date)] diff --git a/airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/spec.json b/airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/spec.json new file mode 100644 index 00000000000..44cc5d21565 --- /dev/null +++ b/airbyte-integrations/connectors/source-python-http-tutorial/source_python_http_tutorial/spec.json @@ -0,0 +1,22 @@ +{ + "documentationUrl": "https://docs.airbyte.io/integrations/sources/exchangeratesapi", + "connectionSpecification": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Python Http Tutorial Spec", + "type": "object", + "required": ["start_date", "currency_base"], + "additionalProperties": false, + "properties": { + "start_date": { + "type": "string", + "description": "Start getting data from that date.", + "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}$", + "examples": ["YYYY-MM-DD"] + }, + "base": { + "type": "string", + "description": "ISO reference currency. See here." + } + } + } +} diff --git a/airbyte-integrations/connectors/source-python-http-tutorial/unit_tests/unit_tests.py b/airbyte-integrations/connectors/source-python-http-tutorial/unit_tests/unit_tests.py new file mode 100644 index 00000000000..a3f12086dd5 --- /dev/null +++ b/airbyte-integrations/connectors/source-python-http-tutorial/unit_tests/unit_tests.py @@ -0,0 +1,52 @@ +# MIT License +# +# Copyright (c) 2020 Airbyte +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +""" +MIT License + +Copyright (c) 2020 Airbyte + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +""" + +# format anchor + + +def test_example_method(): + assert True diff --git a/airbyte-integrations/connectors/source-scaffold-source-http/setup.py b/airbyte-integrations/connectors/source-scaffold-source-http/setup.py index d8d2dfea7be..fcf9b0a3261 100644 --- a/airbyte-integrations/connectors/source-scaffold-source-http/setup.py +++ b/airbyte-integrations/connectors/source-scaffold-source-http/setup.py @@ -54,5 +54,5 @@ setup( author_email="contact@airbyte.io", packages=find_packages(), install_requires=["airbyte-protocol", "base-python", "pytest==6.1.2"], - package_data={"": ["*.json"]}, + package_data={"": ["*.json", "schemas/*.json", "schemas/shared/*.json"]}, ) diff --git a/airbyte-integrations/connectors/source-scaffold-source-http/source_scaffold_source_http/source.py b/airbyte-integrations/connectors/source-scaffold-source-http/source_scaffold_source_http/source.py index 76480851fbf..4666a429285 100644 --- a/airbyte-integrations/connectors/source-scaffold-source-http/source_scaffold_source_http/source.py +++ b/airbyte-integrations/connectors/source-scaffold-source-http/source_scaffold_source_http/source.py @@ -50,7 +50,7 @@ from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple import requests from base_python import AbstractSource, HttpStream, Stream -from base_python.cdk.streams.auth.core import TokenAuthenticator +from base_python.cdk.streams.auth.token import TokenAuthenticator """ TODO: Most comments in this class are instructive and should be deleted after the source is implemented. @@ -223,7 +223,7 @@ class SourceScaffoldSourceHttp(AbstractSource): See https://github.com/airbytehq/airbyte/blob/master/airbyte-integrations/connectors/source-stripe/source_stripe/source.py#L232 for an example. - :param config: the user-input config object conforming the connector's spec.json + :param config: the user-input config object conforming to the connector's spec.json :param logger: logger object :return Tuple[bool, any]: (True, None) if the input config can be used to connect to the API successfully, (False, error) otherwise. """ diff --git a/airbyte-integrations/connectors/source-scaffold-source-http/source_scaffold_source_http/spec.json b/airbyte-integrations/connectors/source-scaffold-source-http/source_scaffold_source_http/spec.json index 8ac78c03afa..63ad05756e5 100644 --- a/airbyte-integrations/connectors/source-scaffold-source-http/source_scaffold_source_http/spec.json +++ b/airbyte-integrations/connectors/source-scaffold-source-http/source_scaffold_source_http/spec.json @@ -1,15 +1,13 @@ -// TODO: This schema defines the configuration required for the source. This usually involves metadata such as database and/or authentication information. -// Delete this comment after reading. { "documentationUrl": "https://docsurl.com", "connectionSpecification": { "$schema": "http://json-schema.org/draft-07/schema#", "title": "Scaffold Source Http Spec", "type": "object", - "required": ["fix-me"], + "required": ["TODO"], "additionalProperties": false, "properties": { - "fix-me": { + "TODO: This schema defines the configuration required for the source. This usually involves metadata such as database and/or authentication information.": { "type": "string", "description": "describe me" } diff --git a/docs/tutorials/slack-history/index.html b/docs/tutorials/slack-history/index.html index d3facd3a79c..0812368137c 100644 --- a/docs/tutorials/slack-history/index.html +++ b/docs/tutorials/slack-history/index.html @@ -10,6 +10,7 @@
+