From 2aa7327a8ecd596c5babca3c3aaa14ca6aaa7c33 Mon Sep 17 00:00:00 2001 From: Joe Reuter Date: Thu, 1 Feb 2024 11:43:54 +0100 Subject: [PATCH] airbyte-lib: Hidden documentation (#34702) Co-authored-by: Aaron ("AJ") Steers --- .../airbyte-lib-high-level-architecture.svg | 1 + docs/contributing-to-airbyte/writing-docs.md | 20 ++++++ .../airbyte-lib/getting-started.mdx | 61 ++++++++++++++++ docs/using-airbyte/airbyte-lib/reference.mdx | 15 ++++ docusaurus/docusaurus.config.js | 8 ++- docusaurus/package.json | 1 + docusaurus/pnpm-lock.yaml | 14 ++++ .../src/components/AirbyteLibConnectors.jsx | 22 ++++++ .../src/components/AirbyteLibDefinitions.jsx | 17 +++++ .../src/components/AirbyteLibExample.jsx | 30 ++++++-- docusaurus/src/components/SpecSchema.jsx | 18 ++--- docusaurus/src/connector_registry.js | 7 +- docusaurus/src/remark/connectorList.js | 24 +++++++ docusaurus/src/remark/docsHeaderDecoration.js | 34 ++------- docusaurus/src/remark/specDecoration.js | 69 +++++++++++++++---- docusaurus/src/remark/utils.js | 34 +++++++++ 16 files changed, 313 insertions(+), 62 deletions(-) create mode 100644 docs/assets/docs/airbyte-lib-high-level-architecture.svg create mode 100644 docs/using-airbyte/airbyte-lib/getting-started.mdx create mode 100644 docs/using-airbyte/airbyte-lib/reference.mdx create mode 100644 docusaurus/src/components/AirbyteLibConnectors.jsx create mode 100644 docusaurus/src/components/AirbyteLibDefinitions.jsx create mode 100644 docusaurus/src/remark/connectorList.js create mode 100644 docusaurus/src/remark/utils.js diff --git a/docs/assets/docs/airbyte-lib-high-level-architecture.svg b/docs/assets/docs/airbyte-lib-high-level-architecture.svg new file mode 100644 index 00000000000..70f0f46bc45 --- /dev/null +++ b/docs/assets/docs/airbyte-lib-high-level-architecture.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/contributing-to-airbyte/writing-docs.md b/docs/contributing-to-airbyte/writing-docs.md index 3f2f2092654..75e6efd9a4e 100644 --- a/docs/contributing-to-airbyte/writing-docs.md +++ b/docs/contributing-to-airbyte/writing-docs.md @@ -324,6 +324,26 @@ Back to ordinary markdown content. ``` Eagle-eyed readers may note that _all_ markdown should support this feature since it's part of the html spec. However, it's worth special mention since these dropdowns have been styled to be a graceful visual fit within our rendered documentation in all environments. +#### Documenting airbyte-lib usage + +airbyte-lib is a Python library that allows to run syncs within a Python script for a subset of connectors. Documentation around airbyte-lib connectors is automatically generated from the connector's JSON schema spec. +There are a few approaches to combine full control over the documentation with automatic generation for common cases: +* If a connector is airbyte-lib enabled (`remoteRegistries.pypi.enabled` set in the `metadata.yaml` file of the connector) and there is no second-level heading `Usage with airbyte-lib` in the documentation, the documentation will be automatically generated and placed above the `Changelog` section. +* By manually specifying a `Usage with airbyte-lib` section, this automatism is disabled. The following is a good starting point for this section: +```md + + +## Usage with airbyte-lib + + + + + + +``` + +The `AirbyteLibExample` component will generate a code example that can be run with airbyte-lib, excluding an auto-generated sample configuration based on the configuration schema. The `SpecSchema` component will generate a reference table with the connector's JSON schema spec, like a non-interactive version of the connector form in the UI. It can be used on any docs page. + ## Additional guidelines - If you're updating a connector doc, follow the [Connector documentation template](https://hackmd.io/Bz75cgATSbm7DjrAqgl4rw) diff --git a/docs/using-airbyte/airbyte-lib/getting-started.mdx b/docs/using-airbyte/airbyte-lib/getting-started.mdx new file mode 100644 index 00000000000..6c568fd2812 --- /dev/null +++ b/docs/using-airbyte/airbyte-lib/getting-started.mdx @@ -0,0 +1,61 @@ +import AirbyteLibConnectors from '@site/src/components/AirbyteLibConnectors'; + +# Getting Started with AirbyteLib (Beta) + +AirbyteLib is a library that provides a set of utilities to use Airbyte connectors in Python. It is meant to be used in situations where setting up an Airbyte server or cloud account is not possible or desirable, for example in a Jupyter notebook or when iterating on early prototypes on a developer's workstation. + +## Installation + +```bash +pip install airbyte-lib +``` + +Or during the beta, you may want to install the latest from from source with: + +```bash +pip install 'git+airbytehq/airbyte.git@master#egg=airbyte-lib&subdirectory=airbyte-lib' +``` + +## Usage + +Data can be extracted from sources and loaded into caches: + +```python +import airbyte_lib as ab + +source = ab.get_connector( + "source-spacex-api", + config={"id": "605b4b6aaa5433645e37d03f"}, + install_if_missing=True, +) +source.check() + +source.set_streams(["launches", "rockets", "capsules"]) + +cache = ab.new_local_cache() +result = source.read_all(cache) + +for name, records in result.cache.streams.items(): + print(f"Stream {name}: {len(records)} records") +``` + +## API Reference + +For details on specific classes and methods, please refer to our [AirbyteLib API Reference](./reference). + +## Architecture + +[comment]: <> (Edit under https://docs.google.com/drawings/d/1M7ti2D4ha6cEtPnk04RLp1SSh3au4dRJsLupnGPigHQ/edit?usp=sharing) + +![Architecture](../../assets/docs/airbyte-lib-high-level-architecture.svg) + +airbyte-lib is a python library that can be run in any context that supports Python >=3.9. It contains the following main components: +* **Source**: A source object is using a Python connector and includes a configuration object. The configuration object is a dictionary that contains the configuration of the connector, like authentication or connection modalities. The source object is used to read data from the connector. +* **Cache**: Data can be read directly from the source object. However, it is recommended to use a cache object to store the data. The cache object allows to temporarily store records from the source in a SQL database like a local DuckDB file or a Postgres or Snowflake instance. +* **Result**: An object holding the records from a read operation on a source. It allows quick access to the records of each synced stream via the used cache object. Data can be accessed as a list of records, a Pandas DataFrame or via SQLAlchemy queries. + +## Available connectors + +The following connectors are available: + + diff --git a/docs/using-airbyte/airbyte-lib/reference.mdx b/docs/using-airbyte/airbyte-lib/reference.mdx new file mode 100644 index 00000000000..07d0dd7fdf3 --- /dev/null +++ b/docs/using-airbyte/airbyte-lib/reference.mdx @@ -0,0 +1,15 @@ +import AirbyteLibDefinitions from '@site/src/components/AirbyteLibDefinitions'; + +# airbyte-lib reference + +This page contains the reference documentation for the airbyte-lib library. + +## Main `airbyte_lib` module + + + +## Caches `airbyte_lib.caches` + +The following cache implementations are available + + \ No newline at end of file diff --git a/docusaurus/docusaurus.config.js b/docusaurus/docusaurus.config.js index f655750e599..06c4311a1d9 100644 --- a/docusaurus/docusaurus.config.js +++ b/docusaurus/docusaurus.config.js @@ -11,6 +11,7 @@ const darkCodeTheme = themes.dracula; const docsHeaderDecoration = require("./src/remark/docsHeaderDecoration"); const productInformation = require("./src/remark/productInformation"); +const connectorList = require("./src/remark/connectorList"); const specDecoration = require("./src/remark/specDecoration"); const redirects = yaml.load( @@ -66,6 +67,10 @@ const config = { test: /\.ya?ml$/, use: "yaml-loader", }, + { + test: /\.html$/i, + loader: "html-loader", + }, ], }, }; @@ -90,7 +95,8 @@ const config = { editUrl: "https://github.com/airbytehq/airbyte/blob/master/docs", path: "../docs", exclude: ["**/*.inapp.md"], - remarkPlugins: [docsHeaderDecoration, productInformation, specDecoration], + beforeDefaultRemarkPlugins: [specDecoration, connectorList], // use before-default plugins so TOC rendering picks up inserted headings + remarkPlugins: [docsHeaderDecoration, productInformation], }, blog: false, theme: { diff --git a/docusaurus/package.json b/docusaurus/package.json index 2b76eecf0b6..c2476ce304c 100644 --- a/docusaurus/package.json +++ b/docusaurus/package.json @@ -105,6 +105,7 @@ "del": "6.1.1", "docusaurus-plugin-hubspot": "^1.0.0", "docusaurus-plugin-segment": "^1.0.3", + "html-loader": "^4.2.0", "js-yaml": "^4.1.0", "json-schema-faker": "^0.5.4", "node-fetch": "^3.3.2", diff --git a/docusaurus/pnpm-lock.yaml b/docusaurus/pnpm-lock.yaml index 39706c103c0..ec5d26397ea 100644 --- a/docusaurus/pnpm-lock.yaml +++ b/docusaurus/pnpm-lock.yaml @@ -278,6 +278,9 @@ dependencies: docusaurus-plugin-segment: specifier: ^1.0.3 version: 1.0.3 + html-loader: + specifier: ^4.2.0 + version: 4.2.0(webpack@5.89.0) js-yaml: specifier: ^4.1.0 version: 4.1.0 @@ -6549,6 +6552,17 @@ packages: resolution: {integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==} dev: false + /html-loader@4.2.0(webpack@5.89.0): + resolution: {integrity: sha512-OxCHD3yt+qwqng2vvcaPApCEvbx+nXWu+v69TYHx1FO8bffHn/JjHtE3TTQZmHjwvnJe4xxzuecetDVBrQR1Zg==} + engines: {node: '>= 14.15.0'} + peerDependencies: + webpack: ^5.0.0 + dependencies: + html-minifier-terser: 7.2.0 + parse5: 7.1.2 + webpack: 5.89.0 + dev: false + /html-minifier-terser@6.1.0: resolution: {integrity: sha512-YXxSlJBZTP7RS3tWnQw74ooKa6L9b9i9QYXY21eUEvhZ3u9XLfv6OnFsQq6RxkhHygsaUMvYsZRV5rU/OVNZxw==} engines: {node: '>=12'} diff --git a/docusaurus/src/components/AirbyteLibConnectors.jsx b/docusaurus/src/components/AirbyteLibConnectors.jsx new file mode 100644 index 00000000000..2a546bda679 --- /dev/null +++ b/docusaurus/src/components/AirbyteLibConnectors.jsx @@ -0,0 +1,22 @@ +export default function AirbyteLibConnectors({ + connectorsJSON, + }) { + const connectors = JSON.parse(connectorsJSON); + return +} + +function getRelativeDocumentationUrl(connector) { + // get the relative path from the the dockerRepository_oss (e.g airbyte/source-amazon-sqs -> /integrations/sources/amazon-sqs) + + const fullDockerImage = connector.dockerRepository_oss; + console.log(fullDockerImage); + const dockerImage = fullDockerImage.split("airbyte/")[1]; + + const [integrationType, ...integrationName] = dockerImage.split("-"); + + return `/integrations/${integrationType}s/${integrationName.join("-")}`; +} \ No newline at end of file diff --git a/docusaurus/src/components/AirbyteLibDefinitions.jsx b/docusaurus/src/components/AirbyteLibDefinitions.jsx new file mode 100644 index 00000000000..e08462f8bfa --- /dev/null +++ b/docusaurus/src/components/AirbyteLibDefinitions.jsx @@ -0,0 +1,17 @@ +import React from 'react'; + +// Add additional modules here +import main_docs from "../../../airbyte-lib/docs/generated/airbyte_lib.html"; +import caches_docs from "../../../airbyte-lib/docs/generated/airbyte_lib/caches.html"; + +const docs = { + "airbyte_lib": main_docs, + "airbyte_lib.caches": caches_docs, +} + + +export default function AirbyteLibDefinitions({ module }) { + return <> +
+ +} diff --git a/docusaurus/src/components/AirbyteLibExample.jsx b/docusaurus/src/components/AirbyteLibExample.jsx index aeb78fbf081..cc9a17638e6 100644 --- a/docusaurus/src/components/AirbyteLibExample.jsx +++ b/docusaurus/src/components/AirbyteLibExample.jsx @@ -1,14 +1,32 @@ -import React from "react"; +import React, { useMemo } from "react"; import { JSONSchemaFaker } from "json-schema-faker"; import CodeBlock from '@theme/CodeBlock'; +/** + * Generate a fake config based on the spec. + * + * As our specs are not 100% consistent, errors may occur. + * Try to generate a few times before giving up. + */ +function generateFakeConfig(spec) { + let tries = 5; + while (tries > 0) { + try { + return JSON.stringify(JSONSchemaFaker.generate(spec), null, 2) + } + catch (e) { + tries--; + } + } + return "{ ... }"; +} export const AirbyteLibExample = ({ specJSON, - connector + connector, }) => { - const spec = JSON.parse(specJSON); - const fakeConfig = JSONSchemaFaker.generate(spec); + const spec = useMemo(() => JSON.parse(specJSON), [specJSON]); + const fakeConfig = useMemo(() => generateFakeConfig(spec), [spec]); return <>

Install the Python library via: @@ -20,12 +38,12 @@ export const AirbyteLibExample = ({ language="python" >{`import airbyte_lib as ab -config = ${JSON.stringify(fakeConfig, null, 2)} +config = ${fakeConfig} result = ab.get_connector( "${connector}", config=config, -).read_all() +).read() for record in result.cache.streams["my_stream:name"]: print(record)`} diff --git a/docusaurus/src/components/SpecSchema.jsx b/docusaurus/src/components/SpecSchema.jsx index 23ac1cc5104..a0cbcbe8298 100644 --- a/docusaurus/src/components/SpecSchema.jsx +++ b/docusaurus/src/components/SpecSchema.jsx @@ -27,7 +27,7 @@ function JSONSchemaViewer(props) { Type

- Title + Property name
@@ -108,16 +108,16 @@ function getType(schema) { function JSONSchemaProperty({ propertyKey, schema, required, depth = 0 }) { const newDepth = depth + 1; - const propertyName = <> -
{propertyKey || schema.title}
+ const fieldName = <> +
{schema.title || propertyKey}
{required &&
required
} ; - const typeAndTitle = <> + const typeAndPropertyName = <>
{getType(schema)}
- {schema.title &&
{schema.title}
} + {propertyKey &&
{propertyKey}
}
; if (showCollapsible(schema)) { @@ -126,9 +126,9 @@ function JSONSchemaProperty({ propertyKey, schema, required, depth = 0 }) { <>
- {propertyName} + {fieldName}
- {typeAndTitle} + {typeAndPropertyName} {showDescription(schema) && } {schema.type === "object" && schema.oneOf && } @@ -140,9 +140,9 @@ function JSONSchemaProperty({ propertyKey, schema, required, depth = 0 }) { } else { return <>
- {propertyName} + {fieldName}
- {typeAndTitle} + {typeAndPropertyName} } } diff --git a/docusaurus/src/connector_registry.js b/docusaurus/src/connector_registry.js index f0468706f60..013650dc8a4 100644 --- a/docusaurus/src/connector_registry.js +++ b/docusaurus/src/connector_registry.js @@ -8,4 +8,9 @@ const fetchCatalog = async () => { return json; }; -module.exports = fetchCatalog(); \ No newline at end of file +module.exports = { + catalog: fetchCatalog(), + isPypiConnector: (connector) => { + return Boolean(connector.remoteRegistries_oss?.pypi?.enabled); + } +} diff --git a/docusaurus/src/remark/connectorList.js b/docusaurus/src/remark/connectorList.js new file mode 100644 index 00000000000..c6fffd88d86 --- /dev/null +++ b/docusaurus/src/remark/connectorList.js @@ -0,0 +1,24 @@ +const visit = require("unist-util-visit").visit; +const { catalog, isPypiConnector } = require("../connector_registry"); + +const plugin = () => { + const transformer = async (ast, vfile) => { + + const registry = await catalog; + + visit(ast, "mdxJsxFlowElement", (node) => { + if (node.name !== "AirbyteLibConnectors") return; + + const connectors = registry.filter(isPypiConnector); + + node.attributes.push({ + type: "mdxJsxAttribute", + name: "connectorsJSON", + value: JSON.stringify(connectors) + }); + }); + }; + return transformer; +}; + +module.exports = plugin; diff --git a/docusaurus/src/remark/docsHeaderDecoration.js b/docusaurus/src/remark/docsHeaderDecoration.js index 46281999d85..8f8fcb96c8c 100644 --- a/docusaurus/src/remark/docsHeaderDecoration.js +++ b/docusaurus/src/remark/docsHeaderDecoration.js @@ -1,5 +1,6 @@ const visit = require("unist-util-visit").visit; -const catalog = require("../connector_registry"); +const { isPypiConnector } = require("../connector_registry"); +const { isDocsPage, getRegistryEntry } = require("./utils"); const toAttributes = (props) => Object.entries(props).map(([key, value]) => ({ @@ -12,19 +13,7 @@ const plugin = () => { const transformer = async (ast, vfile) => { if (!isDocsPage(vfile)) return; - const pathParts = vfile.path.split("/"); - const connectorName = pathParts.pop().split(".")[0]; - const connectorType = pathParts.pop(); - const dockerRepository = `airbyte/${connectorType.replace( - /s$/, - "" - )}-${connectorName}`; - - const registry = await catalog; - - const registryEntry = registry.find( - (r) => r.dockerRepository_oss === dockerRepository - ); + const registryEntry = await getRegistryEntry(vfile); if (!registryEntry) return; @@ -42,7 +31,7 @@ const plugin = () => { node.attributes = toAttributes({ isOss: registryEntry.is_oss, isCloud: registryEntry.is_cloud, - isPypiPublished: Boolean(registryEntry.remoteRegistries?.pypi?.enabled), + isPypiPublished: false, supportLevel: registryEntry.supportLevel_oss, dockerImageTag: registryEntry.dockerImageTag_oss, iconUrl: registryEntry.iconUrl_oss, @@ -57,19 +46,4 @@ const plugin = () => { return transformer; }; -const isDocsPage = (vfile) => { - if ( - !vfile.path.includes("integrations/sources") && - !vfile.path.includes("integrations/destinations") - ) { - return false; - } - - if (vfile.path.includes("-migrations.md")) { - return false; - } - - return true; -}; - module.exports = plugin; diff --git a/docusaurus/src/remark/specDecoration.js b/docusaurus/src/remark/specDecoration.js index ef205bb4c76..5df68b66193 100644 --- a/docusaurus/src/remark/specDecoration.js +++ b/docusaurus/src/remark/specDecoration.js @@ -1,24 +1,63 @@ const visit = require("unist-util-visit").visit; -const catalog = require("../connector_registry"); +const { catalog, isPypiConnector } = require("../connector_registry"); +const { isDocsPage, getRegistryEntry } = require("./utils"); const plugin = () => { const transformer = async (ast, vfile) => { - - const registry = await catalog; - - visit(ast, "mdxJsxFlowElement", (node) => { - if (node.name !== "SpecSchema" && node.name !== "AirbyteLibExample") return; - - const connectorName = node.attributes.find((attr) => attr.name === "connector").value; - const connectorSpec = registry.find( (c) => c.dockerRepository_oss === `airbyte/${connectorName}`).spec_oss.connectionSpecification; - node.attributes.push({ - type: "mdxJsxAttribute", - name: "specJSON", - value: JSON.stringify(connectorSpec) - }); - }); + await injectDefaultAirbyteLibSection(vfile, ast); + await injectSpecSchema(ast); }; return transformer; }; +async function injectSpecSchema(ast) { + const registry = await catalog; + visit(ast, "mdxJsxFlowElement", (node) => { + if (node.name !== "SpecSchema" && node.name !== "AirbyteLibExample") return; + + const connectorName = node.attributes.find((attr) => attr.name === "connector").value; + const connectorSpec = registry.find((c) => c.dockerRepository_oss === `airbyte/${connectorName}`).spec_oss.connectionSpecification; + node.attributes.push({ + type: "mdxJsxAttribute", + name: "specJSON", + value: JSON.stringify(connectorSpec) + }); + }); +} + +async function injectDefaultAirbyteLibSection(vfile, ast) { + const registryEntry = await getRegistryEntry(vfile); + if (!isDocsPage(vfile) || !registryEntry || !isPypiConnector(registryEntry) || vfile.value.includes("## Usage with airbyte-lib")) { + return; + } + const connectorName = registryEntry.dockerRepository_oss.split("/").pop(); + + let added = false; + visit(ast, "heading", (node, index, parent) => { + if (!added && isChangelogHeading(node)) { + added = true; + parent.children.splice(index, 0, { + type: "heading", + depth: 2, + children: [{ type: "text", value: "Reference" }] + }, { + type: "mdxJsxFlowElement", + name: "SpecSchema", + attributes: [ + { + type: "mdxJsxAttribute", + name: "connector", + value: connectorName + }, + ] + }); + } + }); +} + +function isChangelogHeading(node) { + return node.depth === 2 && node.children.length === 1 && node.children[0].value === "Changelog"; +} + + module.exports = plugin; diff --git a/docusaurus/src/remark/utils.js b/docusaurus/src/remark/utils.js new file mode 100644 index 00000000000..f946396ce7f --- /dev/null +++ b/docusaurus/src/remark/utils.js @@ -0,0 +1,34 @@ +const { catalog } = require("../connector_registry"); + +const isDocsPage = (vfile) => { + if ( + !vfile.path.includes("integrations/sources") && + !vfile.path.includes("integrations/destinations") + ) { + return false; + } + + if (vfile.path.includes("-migrations.md")) { + return false; + } + + return true; +}; + +const getRegistryEntry = async (vfile) => { + const pathParts = vfile.path.split("/"); + const connectorName = pathParts.pop().split(".")[0]; + const connectorType = pathParts.pop(); + const dockerRepository = `airbyte/${connectorType.replace( + /s$/, + "" + )}-${connectorName}`; + + const registry = await catalog; + + return registry.find( + (r) => r.dockerRepository_oss === dockerRepository + ); +} + +module.exports = { isDocsPage, getRegistryEntry };