1
0
mirror of synced 2025-12-19 18:14:56 -05:00

Merge branch 'master' into devin/1765229017-fix-klaviyo-profile-subscriptions

This commit is contained in:
Aldo Gonzalez
2025-12-18 11:20:15 -06:00
committed by GitHub
640 changed files with 89902 additions and 5423 deletions

View File

@@ -139,8 +139,8 @@ runs:
CONNECTOR_VERSION_TAG="${{ inputs.tag-override }}"
echo "🏷 Using provided tag override: $CONNECTOR_VERSION_TAG"
elif [[ "${{ inputs.release-type }}" == "pre-release" ]]; then
hash=$(git rev-parse --short=10 HEAD)
CONNECTOR_VERSION_TAG="${CONNECTOR_VERSION}-dev.${hash}"
hash=$(git rev-parse --short=7 HEAD)
CONNECTOR_VERSION_TAG="${CONNECTOR_VERSION}-preview.${hash}"
echo "🏷 Using pre-release tag: $CONNECTOR_VERSION_TAG"
else
CONNECTOR_VERSION_TAG="$CONNECTOR_VERSION"

View File

@@ -21,7 +21,7 @@ As needed or by request, Airbyte Maintainers can execute the following slash com
- `/run-live-tests` - Runs live tests for the modified connector(s).
- `/run-regression-tests` - Runs regression tests for the modified connector(s).
- `/build-connector-images` - Builds and publishes a pre-release docker image for the modified connector(s).
- `/publish-connectors-prerelease` - Publishes pre-release connector builds (tagged as `{version}-dev.{git-sha}`) for all modified connectors in the PR.
- `/publish-connectors-prerelease` - Publishes pre-release connector builds (tagged as `{version}-preview.{git-sha}`) for all modified connectors in the PR.
If you have any questions, feel free to ask in the PR comments or join our [Slack community](https://airbytehq.slack.com/).

View File

@@ -21,11 +21,18 @@ Airbyte Maintainers (that's you!) can execute the following slash commands on yo
- `/bump-version` - Bumps connector versions.
- You can specify a custom changelog by passing `changelog`. Example: `/bump-version changelog="My cool update"`
- Leaving the changelog arg blank will auto-populate the changelog from the PR title.
- `/bump-progressive-rollout-version` - Bumps connector version with an RC suffix for progressive rollouts.
- Creates a release candidate version (e.g., `2.16.10-rc.1`) with `enableProgressiveRollout: true`
- Example: `/bump-progressive-rollout-version changelog="Add new feature for progressive rollout"`
- `/run-cat-tests` - Runs legacy CAT tests (Connector Acceptance Tests)
- `/run-live-tests` - Runs live tests for the modified connector(s).
- `/run-regression-tests` - Runs regression tests for the modified connector(s).
- `/build-connector-images` - Builds and publishes a pre-release docker image for the modified connector(s).
- `/publish-connectors-prerelease` - Publishes pre-release connector builds (tagged as `{version}-dev.{git-sha}`) for all modified connectors in the PR.
- `/publish-connectors-prerelease` - Publishes pre-release connector builds (tagged as `{version}-preview.{git-sha}`) for all modified connectors in the PR.
- Connector release lifecycle (AI-powered):
- `/ai-prove-fix` - Runs prerelease readiness checks, including testing against customer connections.
- `/ai-canary-prerelease` - Rolls out prerelease to 5-10 connections for canary testing.
- `/ai-release-watch` - Monitors rollout post-release and tracks sync success rates.
- JVM connectors:
- `/update-connector-cdk-version connector=<CONNECTOR_NAME>` - Updates the specified connector to the latest CDK version.
Example: `/update-connector-cdk-version connector=destination-bigquery`

View File

@@ -0,0 +1,72 @@
name: AI Canary Prerelease Command
on:
workflow_dispatch:
inputs:
pr:
description: "Pull request number (if triggered from a PR)"
type: number
required: false
comment-id:
description: "The comment-id of the slash command. Used to update the comment with the status."
required: false
repo:
description: "Repo (passed by slash command dispatcher)"
required: false
default: "airbytehq/airbyte"
gitref:
description: "Git ref (passed by slash command dispatcher)"
required: false
run-name: "AI Canary Prerelease for PR #${{ github.event.inputs.pr }}"
permissions:
contents: read
issues: write
pull-requests: read
jobs:
ai-canary-prerelease:
runs-on: ubuntu-latest
steps:
- name: Get job variables
id: job-vars
run: |
echo "run-url=https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
- name: Checkout code
uses: actions/checkout@v4
- name: Authenticate as GitHub App
uses: actions/create-github-app-token@v2
id: get-app-token
with:
owner: "airbytehq"
repositories: "airbyte,oncall"
app-id: ${{ secrets.OCTAVIA_BOT_APP_ID }}
private-key: ${{ secrets.OCTAVIA_BOT_PRIVATE_KEY }}
- name: Post start comment
if: inputs.comment-id != ''
uses: peter-evans/create-or-update-comment@v4
with:
token: ${{ steps.get-app-token.outputs.token }}
comment-id: ${{ inputs.comment-id }}
issue-number: ${{ inputs.pr }}
body: |
> **AI Canary Prerelease Started**
>
> Rolling out to 5-10 connections, watching results, and reporting findings.
> [View workflow run](${{ steps.job-vars.outputs.run-url }})
- name: Run AI Canary Prerelease
uses: aaronsteers/devin-action@main
with:
comment-id: ${{ inputs.comment-id }}
issue-number: ${{ inputs.pr }}
playbook-macro: "!canary_prerelease"
devin-token: ${{ secrets.DEVIN_AI_API_KEY }}
github-token: ${{ steps.get-app-token.outputs.token }}
start-message: "🐤 **AI Canary Prerelease session starting...** Rolling out to 5-10 connections, watching results, and reporting findings. [View playbook](https://github.com/airbytehq/oncall/blob/main/prompts/playbooks/canary_prerelease.md)"
tags: |
ai-oncall

View File

@@ -0,0 +1,72 @@
name: AI Prove Fix Command
on:
workflow_dispatch:
inputs:
pr:
description: "Pull request number (if triggered from a PR)"
type: number
required: false
comment-id:
description: "The comment-id of the slash command. Used to update the comment with the status."
required: false
repo:
description: "Repo (passed by slash command dispatcher)"
required: false
default: "airbytehq/airbyte"
gitref:
description: "Git ref (passed by slash command dispatcher)"
required: false
run-name: "AI Prove Fix for PR #${{ github.event.inputs.pr }}"
permissions:
contents: read
issues: write
pull-requests: read
jobs:
ai-prove-fix:
runs-on: ubuntu-latest
steps:
- name: Get job variables
id: job-vars
run: |
echo "run-url=https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
- name: Checkout code
uses: actions/checkout@v4
- name: Authenticate as GitHub App
uses: actions/create-github-app-token@v2
id: get-app-token
with:
owner: "airbytehq"
repositories: "airbyte,oncall"
app-id: ${{ secrets.OCTAVIA_BOT_APP_ID }}
private-key: ${{ secrets.OCTAVIA_BOT_PRIVATE_KEY }}
- name: Post start comment
if: inputs.comment-id != ''
uses: peter-evans/create-or-update-comment@v4
with:
token: ${{ steps.get-app-token.outputs.token }}
comment-id: ${{ inputs.comment-id }}
issue-number: ${{ inputs.pr }}
body: |
> **AI Prove Fix Started**
>
> Running readiness checks and testing against customer connections.
> [View workflow run](${{ steps.job-vars.outputs.run-url }})
- name: Run AI Prove Fix
uses: aaronsteers/devin-action@main
with:
comment-id: ${{ inputs.comment-id }}
issue-number: ${{ inputs.pr }}
playbook-macro: "!prove_fix"
devin-token: ${{ secrets.DEVIN_AI_API_KEY }}
github-token: ${{ steps.get-app-token.outputs.token }}
start-message: "🔍 **AI Prove Fix session starting...** Running readiness checks and testing against customer connections. [View playbook](https://github.com/airbytehq/oncall/blob/main/prompts/playbooks/prove_fix.md)"
tags: |
ai-oncall

View File

@@ -0,0 +1,72 @@
name: AI Release Watch Command
on:
workflow_dispatch:
inputs:
pr:
description: "Pull request number (if triggered from a PR)"
type: number
required: false
comment-id:
description: "The comment-id of the slash command. Used to update the comment with the status."
required: false
repo:
description: "Repo (passed by slash command dispatcher)"
required: false
default: "airbytehq/airbyte"
gitref:
description: "Git ref (passed by slash command dispatcher)"
required: false
run-name: "AI Release Watch for PR #${{ github.event.inputs.pr }}"
permissions:
contents: read
issues: write
pull-requests: read
jobs:
ai-release-watch:
runs-on: ubuntu-latest
steps:
- name: Get job variables
id: job-vars
run: |
echo "run-url=https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
- name: Checkout code
uses: actions/checkout@v4
- name: Authenticate as GitHub App
uses: actions/create-github-app-token@v2
id: get-app-token
with:
owner: "airbytehq"
repositories: "airbyte,oncall"
app-id: ${{ secrets.OCTAVIA_BOT_APP_ID }}
private-key: ${{ secrets.OCTAVIA_BOT_PRIVATE_KEY }}
- name: Post start comment
if: inputs.comment-id != ''
uses: peter-evans/create-or-update-comment@v4
with:
token: ${{ steps.get-app-token.outputs.token }}
comment-id: ${{ inputs.comment-id }}
issue-number: ${{ inputs.pr }}
body: |
> **AI Release Watch Started**
>
> Monitoring rollout and tracking sync success rates.
> [View workflow run](${{ steps.job-vars.outputs.run-url }})
- name: Run AI Release Watch
uses: aaronsteers/devin-action@main
with:
comment-id: ${{ inputs.comment-id }}
issue-number: ${{ inputs.pr }}
playbook-macro: "!release_watch"
devin-token: ${{ secrets.DEVIN_AI_API_KEY }}
github-token: ${{ steps.get-app-token.outputs.token }}
start-message: "👁️ **AI Release Watch session starting...** Monitoring rollout and tracking sync success rates. [View playbook](https://github.com/airbytehq/oncall/blob/main/prompts/playbooks/release_watch.md)"
tags: |
ai-oncall

View File

@@ -0,0 +1,178 @@
name: Bump connector version for progressive rollout
on:
workflow_dispatch:
inputs:
pr:
description: "Pull request number. This PR will be referenced in the changelog line."
type: number
required: false
comment-id:
description: "Optional. The comment-id of the slash command. Used to update the comment with the status."
required: false
type:
description: "The type of bump to perform. One of 'major', 'minor', or 'patch'."
required: false
default: "patch"
changelog:
description: "Optional. The comment to add to the changelog. If not provided, the PR title will be used."
required: false
default: ""
# These must be declared, but they are unused and ignored.
# TODO: Infer 'repo' and 'gitref' from PR number on other workflows, so we can remove these.
repo:
description: "Repo (Ignored)"
required: false
default: "airbytehq/airbyte"
gitref:
description: "Ref (Ignored)"
required: false
run-name: "Bump connector version for progressive rollout in PR: #${{ github.event.inputs.pr }}"
concurrency:
group: ${{ github.workflow }}-${{ github.event.inputs.pr }}
# Cancel any previous runs on the same branch if they are still in progress
cancel-in-progress: true
jobs:
bump-progressive-rollout-version:
name: "Bump version of connectors for progressive rollout in this PR"
runs-on: ubuntu-24.04
steps:
- name: Get job variables
id: job-vars
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
shell: bash
run: |
PR_JSON=$(gh api repos/${{ github.repository }}/pulls/${{ github.event.inputs.pr }})
echo "repo=$(echo "$PR_JSON" | jq -r .head.repo.full_name)" >> $GITHUB_OUTPUT
echo "branch=$(echo "$PR_JSON" | jq -r .head.ref)" >> $GITHUB_OUTPUT
echo "pr_title=$(echo "$PR_JSON" | jq -r .title)" >> $GITHUB_OUTPUT
echo "run-url=https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
# NOTE: We still use a PAT here (rather than a GitHub App) because the workflow needs
# permissions to add commits to our main repo as well as forks. This will only work on
# forks if the user installs the app into their fork. Until we document this as a clear
# path, we will have to keep using the PAT.
- name: Checkout Airbyte
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
repository: ${{ steps.job-vars.outputs.repo }}
ref: ${{ steps.job-vars.outputs.branch }}
fetch-depth: 1
# Important that token is a PAT so that CI checks are triggered again.
# Without this we would be forever waiting on required checks to pass.
token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }}
- name: Append comment with job run link
# If comment-id is not provided, this will create a new
# comment with the job run link.
id: first-comment-action
uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
with:
comment-id: ${{ github.event.inputs.comment-id }}
issue-number: ${{ github.event.inputs.pr }}
body: |
> **Progressive Rollout Version Bump Started**
>
> This will bump the connector version with an RC suffix and enable progressive rollout.
> [Check job output.][1]
[1]: ${{ steps.job-vars.outputs.run-url }}
- name: Log changelog source
run: |
if [ -n "${{ github.event.inputs.changelog }}" ]; then
echo "Using user-provided changelog: ${{ github.event.inputs.changelog }}"
else
echo "Using PR title as changelog: ${{ steps.job-vars.outputs.pr_title }}"
fi
- name: Run airbyte-ci connectors --modified bump-version with --rc flag
uses: ./.github/actions/run-airbyte-ci
continue-on-error: true
with:
context: "manual"
gcs_credentials: ${{ secrets.METADATA_SERVICE_PROD_GCS_CREDENTIALS }}
sentry_dsn: ${{ secrets.SENTRY_AIRBYTE_CI_DSN }}
github_token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }}
git_repo_url: https://github.com/${{ steps.job-vars.outputs.repo }}.git
subcommand: |
connectors --modified bump-version \
${{ github.event.inputs.type }} \
"${{ github.event.inputs.changelog != '' && github.event.inputs.changelog || steps.job-vars.outputs.pr_title }}" \
--pr-number ${{ github.event.inputs.pr }} \
--rc
# This is helpful in the case that we change a previously committed generated file to be ignored by git.
- name: Remove any files that have been gitignored
run: git ls-files -i -c --exclude-from=.gitignore | xargs -r git rm --cached
# Check for changes in git
- name: Check for changes
id: git-diff
run: |
git diff --quiet && echo "No changes to commit" || echo "changes=true" >> $GITHUB_OUTPUT
shell: bash
# Commit changes (if any)
- name: Commit changes
id: commit-step
if: steps.git-diff.outputs.changes == 'true'
run: |
git config --global user.name "Octavia Squidington III"
git config --global user.email "octavia-squidington-iii@users.noreply.github.com"
git add .
git commit -m "chore: bump-version for progressive rollout"
echo "sha=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT
- name: Push changes to '(${{ steps.job-vars.outputs.repo }})'
if: steps.git-diff.outputs.changes == 'true'
run: |
git remote add contributor https://github.com/${{ steps.job-vars.outputs.repo }}.git
git push contributor HEAD:'${{ steps.job-vars.outputs.branch }}'
- name: Append success comment
uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
if: steps.git-diff.outputs.changes == 'true'
with:
comment-id: ${{ steps.first-comment-action.outputs.comment-id }}
reactions: hooray
body: |
> **Progressive Rollout Version Bump: SUCCESS**
>
> The connector version has been bumped with an RC suffix (e.g., `X.Y.Z-rc.1`).
> Changes applied successfully. (${{ steps.commit-step.outputs.sha }})
>
> **Next steps:**
> 1. Merge this PR to publish the RC version
> 2. Monitor the progressive rollout in production
> 3. When ready to promote, use the `finalize_rollout` workflow with `action=promote`
> 4. If issues arise, use `action=rollback` instead
- name: Append success comment (no-op)
uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
if: steps.git-diff.outputs.changes != 'true'
with:
comment-id: ${{ steps.first-comment-action.outputs.comment-id }}
reactions: "-1"
body: |
> Job completed successfully (no changes detected).
>
> This might happen if:
> - The connector already has an RC version
> - No modified connectors were detected in this PR
- name: Append failure comment
uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
if: failure()
with:
comment-id: ${{ steps.first-comment-action.outputs.comment-id }}
reactions: confused
body: |
> Job failed. Check the [workflow logs](${{ steps.job-vars.outputs.run-url }}) for details.

View File

@@ -0,0 +1,28 @@
name: Label Community PRs
# This workflow automatically adds the "community" label to PRs from forks.
# This enables automatic tracking on the Community PRs project board.
on:
pull_request_target:
types:
- opened
- reopened
jobs:
label-community-pr:
name: Add "Community" Label to PR
# Only run for PRs from forks
if: github.event.pull_request.head.repo.fork == true
runs-on: ubuntu-24.04
permissions:
issues: write
pull-requests: write
steps:
- name: Add community label
# This action uses GitHub's addLabels API, which is idempotent.
# If the label already exists, the API call succeeds without error.
uses: actions-ecosystem/action-add-labels@bd52874380e3909a1ac983768df6976535ece7f8 # v1.1.3
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
labels: community

View File

@@ -3,7 +3,7 @@ name: Publish Connectors Pre-release
# It can be triggered via the /publish-connectors-prerelease slash command from PR comments,
# or via the MCP tool `publish_connector_to_airbyte_registry`.
#
# Pre-release versions are tagged with the format: {version}-dev.{10-char-git-sha}
# Pre-release versions are tagged with the format: {version}-preview.{7-char-git-sha}
# These versions are NOT eligible for semver auto-advancement but ARE available
# for version pinning via the scoped_configuration API.
#
@@ -66,7 +66,7 @@ jobs:
- name: Get short SHA
id: get-sha
run: |
SHORT_SHA=$(git rev-parse --short=10 HEAD)
SHORT_SHA=$(git rev-parse --short=7 HEAD)
echo "short-sha=$SHORT_SHA" >> $GITHUB_OUTPUT
- name: Get job variables
@@ -135,7 +135,7 @@ jobs:
> Publishing pre-release build for connector `${{ steps.resolve-connector.outputs.connector-name }}`.
> Branch: `${{ inputs.gitref }}`
>
> Pre-release versions will be tagged as `{version}-dev.${{ steps.get-sha.outputs.short-sha }}`
> Pre-release versions will be tagged as `{version}-preview.${{ steps.get-sha.outputs.short-sha }}`
> and are available for version pinning via the scoped_configuration API.
>
> [View workflow run](${{ steps.job-vars.outputs.run-url }})
@@ -147,6 +147,7 @@ jobs:
with:
connectors: ${{ format('--name={0}', needs.init.outputs.connector-name) }}
release-type: pre-release
gitref: ${{ inputs.gitref }}
secrets: inherit
post-completion:
@@ -176,13 +177,12 @@ jobs:
id: message-vars
run: |
CONNECTOR_NAME="${{ needs.init.outputs.connector-name }}"
SHORT_SHA="${{ needs.init.outputs.short-sha }}"
VERSION="${{ needs.init.outputs.connector-version }}"
# Use the actual docker-image-tag from the publish workflow output
DOCKER_TAG="${{ needs.publish.outputs.docker-image-tag }}"
if [[ -n "$VERSION" ]]; then
DOCKER_TAG="${VERSION}-dev.${SHORT_SHA}"
else
DOCKER_TAG="{version}-dev.${SHORT_SHA}"
if [[ -z "$DOCKER_TAG" ]]; then
echo "::error::docker-image-tag output is missing from publish workflow. This is unexpected."
exit 1
fi
echo "connector_name=$CONNECTOR_NAME" >> $GITHUB_OUTPUT

View File

@@ -21,6 +21,14 @@ on:
required: false
default: false
type: boolean
gitref:
description: "Git ref (branch or SHA) to build connectors from. Used by pre-release workflow to build from PR branches."
required: false
type: string
outputs:
docker-image-tag:
description: "Docker image tag used when publishing. For single-connector callers only; multi-connector callers should not rely on this output."
value: ${{ jobs.publish_connector_registry_entries.outputs.docker-image-tag }}
workflow_dispatch:
inputs:
connectors:
@@ -48,6 +56,7 @@ jobs:
# v4
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
with:
ref: ${{ inputs.gitref || '' }}
fetch-depth: 2 # Required so we can conduct a diff from the previous commit to understand what connectors have changed.
submodules: true # Required for the enterprise repo since it uses a submodule that needs to exist for this workflow to run successfully.
- name: List connectors to publish [manual]
@@ -105,6 +114,7 @@ jobs:
# v4
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
with:
ref: ${{ inputs.gitref || '' }}
fetch-depth: 2 # Required so we can conduct a diff from the previous commit to understand what connectors have changed.
submodules: true # Required for the enterprise repo since it uses a submodule that needs to exist for this workflow to run successfully.
@@ -250,11 +260,14 @@ jobs:
max-parallel: 5
# Allow all jobs to run, even if one fails
fail-fast: false
outputs:
docker-image-tag: ${{ steps.connector-metadata.outputs.docker-image-tag }}
steps:
- name: Checkout Airbyte
# v4
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
with:
ref: ${{ inputs.gitref || '' }}
fetch-depth: 2 # Required so we can conduct a diff from the previous commit to understand what connectors have changed.
submodules: true # Required for the enterprise repo since it uses a submodule that needs to exist for this workflow to run successfully.
@@ -292,8 +305,8 @@ jobs:
echo "connector-version=$(poe -qq get-version)" | tee -a $GITHUB_OUTPUT
CONNECTOR_VERSION=$(poe -qq get-version)
if [[ "${{ inputs.release-type }}" == "pre-release" ]]; then
hash=$(git rev-parse --short=10 HEAD)
echo "docker-image-tag=${CONNECTOR_VERSION}-dev.${hash}" | tee -a $GITHUB_OUTPUT
hash=$(git rev-parse --short=7 HEAD)
echo "docker-image-tag=${CONNECTOR_VERSION}-preview.${hash}" | tee -a $GITHUB_OUTPUT
echo "release-type-flag=--pre-release" | tee -a $GITHUB_OUTPUT
else
echo "docker-image-tag=${CONNECTOR_VERSION}" | tee -a $GITHUB_OUTPUT
@@ -349,6 +362,7 @@ jobs:
# v4
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
with:
ref: ${{ inputs.gitref || '' }}
submodules: true # Required for the enterprise repo since it uses a submodule that needs to exist for this workflow to run successfully.
- name: Match GitHub User to Slack User
id: match-github-to-slack-user
@@ -381,6 +395,7 @@ jobs:
# v4
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
with:
ref: ${{ inputs.gitref || '' }}
submodules: true # Required for the enterprise repo since it uses a submodule that needs to exist for this workflow to run successfully.
- name: Notify PagerDuty
id: pager-duty

View File

@@ -35,8 +35,12 @@ jobs:
issue-type: both
commands: |
ai-canary-prerelease
ai-prove-fix
ai-release-watch
approve-regression-tests
bump-bulk-cdk-version
bump-progressive-rollout-version
bump-version
build-connector-images
connector-performance

View File

@@ -0,0 +1,70 @@
name: Sync Agent Connector Docs
on:
schedule:
- cron: "0 */2 * * *" # Every 2 hours
workflow_dispatch: # Manual trigger
jobs:
sync-docs:
runs-on: ubuntu-latest
steps:
- name: Checkout airbyte repo
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
- name: Checkout airbyte-agent-connectors
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
repository: airbytehq/airbyte-agent-connectors
path: agent-connectors-source
- name: Sync connector docs
run: |
DEST_DIR="docs/ai-agents/connectors"
mkdir -p "$DEST_DIR"
for connector_dir in agent-connectors-source/connectors/*/; do
connector=$(basename "$connector_dir")
# Only delete/recreate the specific connector subdirectory
# This leaves any files directly in $DEST_DIR untouched
rm -rf "$DEST_DIR/$connector"
mkdir -p "$DEST_DIR/$connector"
# Copy all markdown files for this connector
for md_file in "$connector_dir"/*.md; do
if [ -f "$md_file" ]; then
cp "$md_file" "$DEST_DIR/$connector/"
fi
done
done
echo "Synced $(ls -d $DEST_DIR/*/ 2>/dev/null | wc -l) connectors"
- name: Cleanup temporary checkout
run: rm -rf agent-connectors-source
- name: Authenticate as GitHub App
uses: actions/create-github-app-token@v2
id: get-app-token
with:
owner: "airbytehq"
repositories: "airbyte"
app-id: ${{ secrets.OCTAVIA_BOT_APP_ID }}
private-key: ${{ secrets.OCTAVIA_BOT_PRIVATE_KEY }}
- name: Create PR if changes
uses: peter-evans/create-pull-request@0979079bc20c05bbbb590a56c21c4e2b1d1f1bbe # v6
with:
token: ${{ steps.get-app-token.outputs.token }}
commit-message: "docs: sync agent connector docs from airbyte-agent-connectors repo"
branch: auto-sync-ai-connector-docs
delete-branch: true
title: "docs: sync agent connector docs from airbyte-agent-connectors repo"
body: |
Automated sync of agent connector docs from airbyte-agent-connectors.
This PR was automatically created by the sync-agent-connector-docs workflow.
labels: |
documentation
auto-merge

3
.markdownlintignore Normal file
View File

@@ -0,0 +1,3 @@
# Ignore auto-generated connector documentation files synced from airbyte-agent-connectors repo
# These files are generated and have formatting that doesn't conform to markdownlint rules
docs/ai-agents/connectors/**

View File

@@ -1,3 +1,34 @@
## Version 0.1.91
load cdk: upsert records test uses proper target schema
## Version 0.1.90
load cdk: components tests: data coercion tests cover all data types
## Version 0.1.89
load cdk: components tests: data coercion tests for int+number
## Version 0.1.88
**Load CDK**
* Add CDC_CURSOR_COLUMN_NAME constant.
## Version 0.1.87
**Load CDK**
* Properly call NamespaceMapper before calculating final table names.
## Version 0.1.86
**Load CDK**
* Adds toFinalSchema "escape hatch" for final table schema munging
* Refactored Component test fixtures to require explicit StreamTableSchema creation using TableSchemaFactory
## Version 0.1.85
**Extract CDK**

View File

@@ -104,17 +104,22 @@ class DefaultDestinationCatalogFactory {
catalog: ConfiguredAirbyteCatalog,
streamFactory: DestinationStreamFactory,
tableNameResolver: TableNameResolver,
namespaceMapper: NamespaceMapper,
): DestinationCatalog {
val descriptors =
catalog.streams
.map { DestinationStream.Descriptor(it.stream.namespace, it.stream.name) }
.toSet()
val names = tableNameResolver.getTableNameMapping(descriptors)
// we resolve the table names with the properly mapped descriptors
val mappedDescriptors =
catalog.streams.map { namespaceMapper.map(it.stream.namespace, it.stream.name) }.toSet()
val names = tableNameResolver.getTableNameMapping(mappedDescriptors)
require(
names.size == catalog.streams.size,
{ "Invariant violation: An incomplete table name mapping was generated." }
)
return DestinationCatalog(
streams =
catalog.streams.map {
val key = DestinationStream.Descriptor(it.stream.namespace, it.stream.name)
val key = namespaceMapper.map(it.stream.namespace, it.stream.name)
streamFactory.make(it, names[key]!!)
}
)

View File

@@ -42,10 +42,13 @@ class TableSchemaFactory(
finalSchema = finalSchema,
)
return StreamTableSchema(
val tableSchema =
StreamTableSchema(
tableNames,
columnSchema,
importType,
)
return mapper.toFinalSchema(tableSchema)
}
}

View File

@@ -7,17 +7,64 @@ package io.airbyte.cdk.load.schema
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.component.ColumnType
import io.airbyte.cdk.load.data.FieldType
import io.airbyte.cdk.load.schema.model.StreamTableSchema
import io.airbyte.cdk.load.schema.model.TableName
/** Transforms input schema elements to destination-specific naming and type conventions. */
interface TableSchemaMapper {
/**
* Converts a stream descriptor to the final destination table name.
*
* @param desc The stream descriptor containing namespace and name information
* @return The mapped final table name in the destination system
*/
fun toFinalTableName(desc: DestinationStream.Descriptor): TableName
/**
* Generates a temporary table name based on the provided final table name. Temporary tables are
* typically used before data is moved to final tables to avoid data downtime.
*
* @param tableName The final table name to base the temporary name on
* @return The temporary table name
*/
fun toTempTableName(tableName: TableName): TableName
/**
* Transforms a column name from the input schema to comply with destination naming conventions.
* This may include handling special characters, case transformations, or length limitations.
*
* @param name The original column name from the input schema
* @return The destination-compatible column name
*/
fun toColumnName(name: String): String
/**
* Converts an Airbyte field type to the corresponding destination-specific column type. This
* handles mapping of data types from Airbyte's type system to the destination database's type
* system.
*
* @param fieldType The Airbyte field type to convert
* @return The destination-specific column type representation
*/
fun toColumnType(fieldType: FieldType): ColumnType
/**
* Performs any final transformations on the complete table schema before it's used in the
* destination. By default, returns the schema unchanged. Override to apply destination-specific
* schema modifications.
*
* @param tableSchema The complete stream table schema
* @return The finalized schema ready for use in the destination
*/
fun toFinalSchema(tableSchema: StreamTableSchema) = tableSchema
/**
* Determines if two column names conflict according to destination-specific rules. By default,
* performs case-insensitive comparison. Override for different conflict detection logic.
*
* @param a First column name
* @param b Second column name
* @return true if the column names conflict, false otherwise
*/
fun colsConflict(a: String, b: String): Boolean = a.equals(b, ignoreCase = true)
}

View File

@@ -4,4 +4,13 @@
package io.airbyte.cdk.load.table
/**
* CDC meta column names.
*
* Note: These CDC column names are brittle as they are separate yet coupled to the logic sources
* use to generate these column names. See
* [io.airbyte.integrations.source.mssql.MsSqlSourceOperations.MsSqlServerCdcMetaFields] for an
* example.
*/
const val CDC_DELETED_AT_COLUMN = "_ab_cdc_deleted_at"
const val CDC_CURSOR_COLUMN = "_ab_cdc_cursor"

View File

@@ -11,6 +11,7 @@ import io.airbyte.cdk.load.component.ColumnType
import io.airbyte.cdk.load.data.FieldType
import io.airbyte.cdk.load.data.IntegerType
import io.airbyte.cdk.load.data.StringType
import io.airbyte.cdk.load.schema.model.StreamTableSchema
import io.airbyte.cdk.load.schema.model.TableName
import io.mockk.every
import io.mockk.impl.annotations.MockK
@@ -42,6 +43,7 @@ class TableSchemaFactoryTest {
every { mapper.toTempTableName(finalTableName) } returns tempTableName
every { colNameResolver.getColumnNameMapping(inputSchema.keys) } returns columnNameMapping
every { mapper.toColumnType(any()) } returns ColumnType("test_type", false)
every { mapper.toFinalSchema(any()) } answers { firstArg<StreamTableSchema>() }
val result = factory.make(finalTableName, inputSchema, importType)

View File

@@ -0,0 +1,859 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.component
import io.airbyte.cdk.load.data.AirbyteValue
import io.airbyte.cdk.load.data.ArrayValue
import io.airbyte.cdk.load.data.DateValue
import io.airbyte.cdk.load.data.IntegerValue
import io.airbyte.cdk.load.data.NullValue
import io.airbyte.cdk.load.data.NumberValue
import io.airbyte.cdk.load.data.ObjectValue
import io.airbyte.cdk.load.data.StringValue
import io.airbyte.cdk.load.data.TimeWithTimezoneValue
import io.airbyte.cdk.load.data.TimeWithoutTimezoneValue
import io.airbyte.cdk.load.data.TimestampWithTimezoneValue
import io.airbyte.cdk.load.data.TimestampWithoutTimezoneValue
import io.airbyte.cdk.load.dataflow.transform.ValueCoercer
import io.airbyte.cdk.load.util.serializeToString
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange.Reason
import java.math.BigDecimal
import java.math.BigInteger
import java.time.LocalDate
import java.time.LocalDateTime
import java.time.OffsetDateTime
import java.time.format.DateTimeFormatter
import java.time.format.DateTimeFormatterBuilder
import java.time.format.SignStyle
import java.time.temporal.ChronoField
import org.junit.jupiter.params.provider.Arguments
/*
* This file defines "interesting values" for all data types, along with expected behavior for those values.
* You're free to define your own values/behavior depending on the destination, but it's recommended
* that you try to match behavior to an existing fixture.
*
* Classes also include some convenience functions for JUnit. For example, you could annotate your
* method with:
* ```kotlin
* @ParameterizedTest
* @MethodSource("io.airbyte.cdk.load.component.DataCoercionIntegerFixtures#int64")
* ```
*
* By convention, all fixtures are declared as:
* 1. One or more `val <name>: List<Pair<AirbyteValue, Any?>>` (each pair representing the input value,
* and the expected output value)
* 2. One or more `fun <name>(): List<Arguments> = <name>.toArgs()`, which can be provided to JUnit's MethodSource
*
* If you need to mutate fixtures in some way, you should reference the `val`, and use the `toArgs()`
* extension function to convert it to JUnit's Arguments class. See [DataCoercionIntegerFixtures.int64AsBigInteger]
* for an example.
*/
object DataCoercionIntegerFixtures {
// "9".repeat(38)
val numeric38_0Max = bigint("99999999999999999999999999999999999999")
val numeric38_0Min = bigint("-99999999999999999999999999999999999999")
const val ZERO = "0"
const val ONE = "1"
const val NEGATIVE_ONE = "-1"
const val FORTY_TWO = "42"
const val NEGATIVE_FORTY_TWO = "-42"
const val INT32_MAX = "int32 max"
const val INT32_MIN = "int32 min"
const val INT32_MAX_PLUS_ONE = "int32_max + 1"
const val INT32_MIN_MINUS_ONE = "int32_min - 1"
const val INT64_MAX = "int64 max"
const val INT64_MIN = "int64 min"
const val INT64_MAX_PLUS_ONE = "int64_max + 1"
const val INT64_MIN_MINUS_1 = "int64_min - 1"
const val NUMERIC_38_0_MAX = "numeric(38,0) max"
const val NUMERIC_38_0_MIN = "numeric(38,0) min"
const val NUMERIC_38_0_MAX_PLUS_ONE = "numeric(38,0)_max + 1"
const val NUMERIC_38_0_MIN_MINUS_ONE = "numeric(38,0)_min - 1"
/**
* Many destinations use int64 to represent integers. In this case, we null out any value beyond
* Long.MIN/MAX_VALUE.
*/
val int64 =
listOf(
case(NULL, NullValue, null),
case(ZERO, IntegerValue(0), 0L),
case(ONE, IntegerValue(1), 1L),
case(NEGATIVE_ONE, IntegerValue(-1), -1L),
case(FORTY_TWO, IntegerValue(42), 42L),
case(NEGATIVE_FORTY_TWO, IntegerValue(-42), -42L),
// int32 bounds, and slightly out of bounds
case(INT32_MAX, IntegerValue(Integer.MAX_VALUE.toLong()), Integer.MAX_VALUE.toLong()),
case(INT32_MIN, IntegerValue(Integer.MIN_VALUE.toLong()), Integer.MIN_VALUE.toLong()),
case(
INT32_MAX_PLUS_ONE,
IntegerValue(Integer.MAX_VALUE.toLong() + 1),
Integer.MAX_VALUE.toLong() + 1
),
case(
INT32_MIN_MINUS_ONE,
IntegerValue(Integer.MIN_VALUE.toLong() - 1),
Integer.MIN_VALUE.toLong() - 1
),
// int64 bounds, and slightly out of bounds
case(INT64_MAX, IntegerValue(Long.MAX_VALUE), Long.MAX_VALUE),
case(INT64_MIN, IntegerValue(Long.MIN_VALUE), Long.MIN_VALUE),
// values out of int64 bounds are nulled
case(
INT64_MAX_PLUS_ONE,
IntegerValue(bigint(Long.MAX_VALUE) + BigInteger.ONE),
null,
Reason.DESTINATION_FIELD_SIZE_LIMITATION
),
case(
INT64_MIN_MINUS_1,
IntegerValue(bigint(Long.MIN_VALUE) - BigInteger.ONE),
null,
Reason.DESTINATION_FIELD_SIZE_LIMITATION
),
// NUMERIC(38, 9) bounds, and slightly out of bounds
// (these are all out of bounds for an int64 value, so they all get nulled)
case(
NUMERIC_38_0_MAX,
IntegerValue(numeric38_0Max),
null,
Reason.DESTINATION_FIELD_SIZE_LIMITATION
),
case(
NUMERIC_38_0_MIN,
IntegerValue(numeric38_0Min),
null,
Reason.DESTINATION_FIELD_SIZE_LIMITATION
),
case(
NUMERIC_38_0_MAX_PLUS_ONE,
IntegerValue(numeric38_0Max + BigInteger.ONE),
null,
Reason.DESTINATION_FIELD_SIZE_LIMITATION
),
case(
NUMERIC_38_0_MIN_MINUS_ONE,
IntegerValue(numeric38_0Min - BigInteger.ONE),
null,
Reason.DESTINATION_FIELD_SIZE_LIMITATION
),
)
/**
* Many destination warehouses represent integers as a fixed-point type with 38 digits of
* precision. In this case, we only need to null out numbers larger than `1e38 - 1` / smaller
* than `-1e38 + 1`.
*/
val numeric38_0 =
listOf(
case(NULL, NullValue, null),
case(ZERO, IntegerValue(0), bigint(0L)),
case(ONE, IntegerValue(1), bigint(1L)),
case(NEGATIVE_ONE, IntegerValue(-1), bigint(-1L)),
case(FORTY_TWO, IntegerValue(42), bigint(42L)),
case(NEGATIVE_FORTY_TWO, IntegerValue(-42), bigint(-42L)),
// int32 bounds, and slightly out of bounds
case(
INT32_MAX,
IntegerValue(Integer.MAX_VALUE.toLong()),
bigint(Integer.MAX_VALUE.toLong())
),
case(
INT32_MIN,
IntegerValue(Integer.MIN_VALUE.toLong()),
bigint(Integer.MIN_VALUE.toLong())
),
case(
INT32_MAX_PLUS_ONE,
IntegerValue(Integer.MAX_VALUE.toLong() + 1),
bigint(Integer.MAX_VALUE.toLong() + 1)
),
case(
INT32_MIN_MINUS_ONE,
IntegerValue(Integer.MIN_VALUE.toLong() - 1),
bigint(Integer.MIN_VALUE.toLong() - 1)
),
// int64 bounds, and slightly out of bounds
case(INT64_MAX, IntegerValue(Long.MAX_VALUE), bigint(Long.MAX_VALUE)),
case(INT64_MIN, IntegerValue(Long.MIN_VALUE), bigint(Long.MIN_VALUE)),
case(
INT64_MAX_PLUS_ONE,
IntegerValue(bigint(Long.MAX_VALUE) + BigInteger.ONE),
bigint(Long.MAX_VALUE) + BigInteger.ONE
),
case(
INT64_MIN_MINUS_1,
IntegerValue(bigint(Long.MIN_VALUE) - BigInteger.ONE),
bigint(Long.MIN_VALUE) - BigInteger.ONE
),
// NUMERIC(38, 9) bounds, and slightly out of bounds
case(NUMERIC_38_0_MAX, IntegerValue(numeric38_0Max), numeric38_0Max),
case(NUMERIC_38_0_MIN, IntegerValue(numeric38_0Min), numeric38_0Min),
// These values exceed the 38-digit range, so they get nulled out
case(
NUMERIC_38_0_MAX_PLUS_ONE,
IntegerValue(numeric38_0Max + BigInteger.ONE),
null,
Reason.DESTINATION_FIELD_SIZE_LIMITATION
),
case(
NUMERIC_38_0_MIN_MINUS_ONE,
IntegerValue(numeric38_0Min - BigInteger.ONE),
null,
Reason.DESTINATION_FIELD_SIZE_LIMITATION
),
)
@JvmStatic fun int64() = int64.toArgs()
/**
* Convenience fixture if your [TestTableOperationsClient] returns integers as [BigInteger]
* rather than [Long].
*/
@JvmStatic
fun int64AsBigInteger() =
int64.map { it.copy(outputValue = it.outputValue?.let { bigint(it as Long) }) }
/**
* Convenience fixture if your [TestTableOperationsClient] returns integers as [BigDecimal]
* rather than [Long].
*/
@JvmStatic
fun int64AsBigDecimal() =
int64.map { it.copy(outputValue = it.outputValue?.let { BigDecimal.valueOf(it as Long) }) }
@JvmStatic fun numeric38_0() = numeric38_0.toArgs()
}
object DataCoercionNumberFixtures {
val numeric38_9Max = bigdec("99999999999999999999999999999.999999999")
val numeric38_9Min = bigdec("-99999999999999999999999999999.999999999")
const val ZERO = "0"
const val ONE = "1"
const val NEGATIVE_ONE = "-1"
const val ONE_HUNDRED_TWENTY_THREE_POINT_FOUR = "123.4"
const val NEGATIVE_ONE_HUNDRED_TWENTY_THREE_POINT_FOUR = "123.4"
const val POSITIVE_HIGH_PRECISION_FLOAT = "positive high-precision float"
const val NEGATIVE_HIGH_PRECISION_FLOAT = "negative high-precision float"
const val NUMERIC_38_9_MAX = "numeric(38,9) max"
const val NUMERIC_38_9_MIN = "numeric(38,9) min"
const val SMALLEST_POSITIVE_FLOAT32 = "smallest positive float32"
const val SMALLEST_NEGATIVE_FLOAT32 = "smallest negative float32"
const val LARGEST_POSITIVE_FLOAT32 = "largest positive float32"
const val LARGEST_NEGATIVE_FLOAT32 = "largest negative float32"
const val SMALLEST_POSITIVE_FLOAT64 = "smallest positive float64"
const val SMALLEST_NEGATIVE_FLOAT64 = "smallest negative float64"
const val LARGEST_POSITIVE_FLOAT64 = "largest positive float64"
const val LARGEST_NEGATIVE_FLOAT64 = "largest negative float64"
const val SLIGHTLY_ABOVE_LARGEST_POSITIVE_FLOAT64 = "slightly above largest positive float64"
const val SLIGHTLY_BELOW_LARGEST_NEGATIVE_FLOAT64 = "slightly below largest negative float64"
val float64 =
listOf(
case(NULL, NullValue, null),
case(ZERO, NumberValue(bigdec(0)), 0.0),
case(ONE, NumberValue(bigdec(1)), 1.0),
case(NEGATIVE_ONE, NumberValue(bigdec(-1)), -1.0),
// This value isn't exactly representable as a float64
// (the exact value is `123.400000000000005684341886080801486968994140625`)
// but we should preserve the canonical representation
case(ONE_HUNDRED_TWENTY_THREE_POINT_FOUR, NumberValue(bigdec("123.4")), 123.4),
case(
NEGATIVE_ONE_HUNDRED_TWENTY_THREE_POINT_FOUR,
NumberValue(bigdec("-123.4")),
-123.4
),
// These values have too much precision for a float64, so we round them
case(
POSITIVE_HIGH_PRECISION_FLOAT,
NumberValue(bigdec("1234567890.1234567890123456789")),
1234567890.1234567,
Reason.DESTINATION_FIELD_SIZE_LIMITATION
),
case(
NEGATIVE_HIGH_PRECISION_FLOAT,
NumberValue(bigdec("-1234567890.1234567890123456789")),
-1234567890.1234567,
Reason.DESTINATION_FIELD_SIZE_LIMITATION
),
case(
NUMERIC_38_9_MAX,
NumberValue(numeric38_9Max),
1.0E29,
Reason.DESTINATION_FIELD_SIZE_LIMITATION
),
case(
NUMERIC_38_9_MIN,
NumberValue(numeric38_9Min),
-1.0E29,
Reason.DESTINATION_FIELD_SIZE_LIMITATION
),
// min/max_value are all positive values, so we need to manually test their negative
// version
case(
SMALLEST_POSITIVE_FLOAT32,
NumberValue(bigdec(Float.MIN_VALUE.toDouble())),
Float.MIN_VALUE.toDouble()
),
case(
SMALLEST_NEGATIVE_FLOAT32,
NumberValue(bigdec(-Float.MIN_VALUE.toDouble())),
-Float.MIN_VALUE.toDouble()
),
case(
LARGEST_POSITIVE_FLOAT32,
NumberValue(bigdec(Float.MAX_VALUE.toDouble())),
Float.MAX_VALUE.toDouble()
),
case(
LARGEST_NEGATIVE_FLOAT32,
NumberValue(bigdec(-Float.MAX_VALUE.toDouble())),
-Float.MAX_VALUE.toDouble()
),
case(
SMALLEST_POSITIVE_FLOAT64,
NumberValue(bigdec(Double.MIN_VALUE)),
Double.MIN_VALUE
),
case(
SMALLEST_NEGATIVE_FLOAT64,
NumberValue(bigdec(-Double.MIN_VALUE)),
-Double.MIN_VALUE
),
case(LARGEST_POSITIVE_FLOAT64, NumberValue(bigdec(Double.MAX_VALUE)), Double.MAX_VALUE),
case(
LARGEST_NEGATIVE_FLOAT64,
NumberValue(bigdec(-Double.MAX_VALUE)),
-Double.MAX_VALUE
),
// These values are out of bounds, so we null them
case(
SLIGHTLY_ABOVE_LARGEST_POSITIVE_FLOAT64,
NumberValue(bigdec(Double.MAX_VALUE) + bigdec(Double.MIN_VALUE)),
null,
Reason.DESTINATION_FIELD_SIZE_LIMITATION
),
case(
SLIGHTLY_BELOW_LARGEST_NEGATIVE_FLOAT64,
NumberValue(bigdec(-Double.MAX_VALUE) - bigdec(Double.MIN_VALUE)),
null,
Reason.DESTINATION_FIELD_SIZE_LIMITATION
),
)
val numeric38_9 =
listOf(
case(NULL, NullValue, null),
case(ZERO, NumberValue(bigdec(0)), bigdec(0.0)),
case(ONE, NumberValue(bigdec(1)), bigdec(1.0)),
case(NEGATIVE_ONE, NumberValue(bigdec(-1)), bigdec(-1.0)),
// This value isn't exactly representable as a float64
// (the exact value is `123.400000000000005684341886080801486968994140625`)
// but it's perfectly fine as a numeric(38, 9)
case(
ONE_HUNDRED_TWENTY_THREE_POINT_FOUR,
NumberValue(bigdec("123.4")),
bigdec("123.4")
),
case(
NEGATIVE_ONE_HUNDRED_TWENTY_THREE_POINT_FOUR,
NumberValue(bigdec("-123.4")),
bigdec("-123.4")
),
// These values have too much precision for a numeric(38, 9), so we round them
case(
POSITIVE_HIGH_PRECISION_FLOAT,
NumberValue(bigdec("1234567890.1234567890123456789")),
bigdec("1234567890.123456789"),
Reason.DESTINATION_FIELD_SIZE_LIMITATION
),
case(
NEGATIVE_HIGH_PRECISION_FLOAT,
NumberValue(bigdec("-1234567890.1234567890123456789")),
bigdec("-1234567890.123456789"),
Reason.DESTINATION_FIELD_SIZE_LIMITATION
),
case(
SMALLEST_POSITIVE_FLOAT32,
NumberValue(bigdec(Float.MIN_VALUE.toDouble())),
bigdec(0),
Reason.DESTINATION_FIELD_SIZE_LIMITATION
),
case(
SMALLEST_NEGATIVE_FLOAT32,
NumberValue(bigdec(-Float.MIN_VALUE.toDouble())),
bigdec(0),
Reason.DESTINATION_FIELD_SIZE_LIMITATION
),
case(
SMALLEST_POSITIVE_FLOAT64,
NumberValue(bigdec(Double.MIN_VALUE)),
bigdec(0),
Reason.DESTINATION_FIELD_SIZE_LIMITATION
),
case(
SMALLEST_NEGATIVE_FLOAT64,
NumberValue(bigdec(-Double.MIN_VALUE)),
bigdec(0),
Reason.DESTINATION_FIELD_SIZE_LIMITATION
),
// numeric bounds are perfectly fine
case(NUMERIC_38_9_MAX, NumberValue(numeric38_9Max), numeric38_9Max),
case(NUMERIC_38_9_MIN, NumberValue(numeric38_9Min), numeric38_9Min),
// These values are out of bounds, so we null them
case(
LARGEST_POSITIVE_FLOAT32,
NumberValue(bigdec(Float.MAX_VALUE.toDouble())),
null,
Reason.DESTINATION_FIELD_SIZE_LIMITATION
),
case(
LARGEST_NEGATIVE_FLOAT32,
NumberValue(bigdec(-Float.MAX_VALUE.toDouble())),
null,
Reason.DESTINATION_FIELD_SIZE_LIMITATION
),
case(
LARGEST_POSITIVE_FLOAT64,
NumberValue(bigdec(Double.MAX_VALUE)),
null,
Reason.DESTINATION_FIELD_SIZE_LIMITATION
),
case(
LARGEST_NEGATIVE_FLOAT64,
NumberValue(bigdec(-Double.MAX_VALUE)),
null,
Reason.DESTINATION_FIELD_SIZE_LIMITATION
),
case(
SLIGHTLY_ABOVE_LARGEST_POSITIVE_FLOAT64,
NumberValue(bigdec(Double.MAX_VALUE) + bigdec(Double.MIN_VALUE)),
null,
Reason.DESTINATION_FIELD_SIZE_LIMITATION
),
case(
SLIGHTLY_BELOW_LARGEST_NEGATIVE_FLOAT64,
NumberValue(bigdec(-Double.MAX_VALUE) - bigdec(Double.MIN_VALUE)),
null,
Reason.DESTINATION_FIELD_SIZE_LIMITATION
),
)
.map { it.copy(outputValue = (it.outputValue as BigDecimal?)?.setScale(9)) }
@JvmStatic fun float64() = float64.toArgs()
@JvmStatic fun numeric38_9() = numeric38_9.toArgs()
}
const val SIMPLE_TIMESTAMP = "simple timestamp"
const val UNIX_EPOCH = "unix epoch"
const val MINIMUM_TIMESTAMP = "minimum timestamp"
const val MAXIMUM_TIMESTAMP = "maximum timestamp"
const val OUT_OF_RANGE_TIMESTAMP = "out of range timestamp"
const val HIGH_PRECISION_TIMESTAMP = "high-precision timestamp"
object DataCoercionTimestampTzFixtures {
/**
* Many warehouses support timestamps between years 0001 - 9999.
*
* Depending on the exact warehouse, you may need to tweak the precision on some values. For
* example, Snowflake supports nanoseconds-precision timestamps (9 decimal points), but Bigquery
* only supports microseconds-precision (6 decimal points). Bigquery would probably do something
* like:
* ```kotlin
* DataCoercionNumberFixtures.traditionalWarehouse
* .map {
* when (it.name) {
* "maximum AD timestamp" -> it.copy(
* inputValue = TimestampWithTimezoneValue("9999-12-31T23:59:59.999999Z"),
* outputValue = OffsetDateTime.parse("9999-12-31T23:59:59.999999Z"),
* changeReason = Reason.DESTINATION_FIELD_SIZE_LIMITATION,
* )
* "high-precision timestamp" -> it.copy(
* outputValue = OffsetDateTime.parse("2025-01-23T01:01:00.123456Z"),
* changeReason = Reason.DESTINATION_FIELD_SIZE_LIMITATION,
* )
* }
* }
* ```
*/
val commonWarehouse =
listOf(
case(NULL, NullValue, null),
case(
SIMPLE_TIMESTAMP,
TimestampWithTimezoneValue("2025-01-23T12:34:56.789Z"),
"2025-01-23T12:34:56.789Z",
),
case(
UNIX_EPOCH,
TimestampWithTimezoneValue("1970-01-01T00:00:00Z"),
"1970-01-01T00:00:00Z",
),
case(
MINIMUM_TIMESTAMP,
TimestampWithTimezoneValue("0001-01-01T00:00:00Z"),
"0001-01-01T00:00:00Z",
),
case(
MAXIMUM_TIMESTAMP,
TimestampWithTimezoneValue("9999-12-31T23:59:59.999999999Z"),
"9999-12-31T23:59:59.999999999Z",
),
case(
OUT_OF_RANGE_TIMESTAMP,
TimestampWithTimezoneValue(odt("10000-01-01T00:00Z")),
null,
Reason.DESTINATION_FIELD_SIZE_LIMITATION,
),
case(
HIGH_PRECISION_TIMESTAMP,
TimestampWithTimezoneValue("2025-01-23T01:01:00.123456789Z"),
"2025-01-23T01:01:00.123456789Z",
),
)
@JvmStatic fun commonWarehouse() = commonWarehouse.toArgs()
}
object DataCoercionTimestampNtzFixtures {
/** See [DataCoercionTimestampTzFixtures.commonWarehouse] for explanation */
val commonWarehouse =
listOf(
case(NULL, NullValue, null),
case(
SIMPLE_TIMESTAMP,
TimestampWithoutTimezoneValue("2025-01-23T12:34:56.789"),
"2025-01-23T12:34:56.789",
),
case(
UNIX_EPOCH,
TimestampWithoutTimezoneValue("1970-01-01T00:00:00"),
"1970-01-01T00:00:00",
),
case(
MINIMUM_TIMESTAMP,
TimestampWithoutTimezoneValue("0001-01-01T00:00:00"),
"0001-01-01T00:00:00",
),
case(
MAXIMUM_TIMESTAMP,
TimestampWithoutTimezoneValue("9999-12-31T23:59:59.999999999"),
"9999-12-31T23:59:59.999999999",
),
case(
OUT_OF_RANGE_TIMESTAMP,
TimestampWithoutTimezoneValue(ldt("10000-01-01T00:00")),
null,
Reason.DESTINATION_FIELD_SIZE_LIMITATION,
),
case(
HIGH_PRECISION_TIMESTAMP,
TimestampWithoutTimezoneValue("2025-01-23T01:01:00.123456789"),
"2025-01-23T01:01:00.123456789",
),
)
@JvmStatic fun commonWarehouse() = commonWarehouse.toArgs()
}
const val MIDNIGHT = "midnight"
const val MAX_TIME = "max time"
const val HIGH_NOON = "high noon"
object DataCoercionTimeTzFixtures {
val timetz =
listOf(
case(NULL, NullValue, null),
case(MIDNIGHT, TimeWithTimezoneValue("00:00Z"), "00:00Z"),
case(MAX_TIME, TimeWithTimezoneValue("23:59:59.999999999Z"), "23:59:59.999999999Z"),
case(HIGH_NOON, TimeWithTimezoneValue("12:00Z"), "12:00Z"),
)
@JvmStatic fun timetz() = timetz.toArgs()
}
object DataCoercionTimeNtzFixtures {
val timentz =
listOf(
case(NULL, NullValue, null),
case(MIDNIGHT, TimeWithoutTimezoneValue("00:00"), "00:00"),
case(MAX_TIME, TimeWithoutTimezoneValue("23:59:59.999999999"), "23:59:59.999999999"),
case(HIGH_NOON, TimeWithoutTimezoneValue("12:00"), "12:00"),
)
@JvmStatic fun timentz() = timentz.toArgs()
}
object DataCoercionDateFixtures {
val commonWarehouse =
listOf(
case(NULL, NullValue, null),
case(
SIMPLE_TIMESTAMP,
DateValue("2025-01-23"),
"2025-01-23",
),
case(
UNIX_EPOCH,
DateValue("1970-01-01"),
"1970-01-01",
),
case(
MINIMUM_TIMESTAMP,
DateValue("0001-01-01"),
"0001-01-01",
),
case(
MAXIMUM_TIMESTAMP,
DateValue("9999-12-31"),
"9999-12-31",
),
case(
OUT_OF_RANGE_TIMESTAMP,
DateValue(date("10000-01-01")),
null,
Reason.DESTINATION_FIELD_SIZE_LIMITATION,
),
)
@JvmStatic fun commonWarehouse() = commonWarehouse.toArgs()
}
object DataCoercionStringFixtures {
const val EMPTY_STRING = "empty string"
const val SHORT_STRING = "short string"
const val LONG_STRING = "long string"
const val SPECIAL_CHARS_STRING = "special chars string"
val strings =
listOf(
case(NULL, NullValue, null),
case(EMPTY_STRING, StringValue(""), ""),
case(SHORT_STRING, StringValue("foo"), "foo"),
// Implementers may override this to test their destination-specific limits.
// The default value is 8MB + 1 byte (slightly longer than snowflake's varchar limit).
case(
LONG_STRING,
StringValue("a".repeat(16777216 + 1)),
null,
Reason.DESTINATION_FIELD_SIZE_LIMITATION
),
case(
SPECIAL_CHARS_STRING,
StringValue("`~!@#$%^&*()-=_+[]\\{}|o'O\",./<>?)Δ⅀↑∀"),
"`~!@#$%^&*()-=_+[]\\{}|o'O\",./<>?)Δ⅀↑∀"
),
)
@JvmStatic fun strings() = strings.toArgs()
}
object DataCoercionObjectFixtures {
const val EMPTY_OBJECT = "empty object"
const val NORMAL_OBJECT = "normal object"
val objects =
listOf(
case(NULL, NullValue, null),
case(EMPTY_OBJECT, ObjectValue(linkedMapOf()), emptyMap<String, Any?>()),
case(
NORMAL_OBJECT,
ObjectValue(linkedMapOf("foo" to StringValue("bar"))),
mapOf("foo" to "bar")
),
)
val stringifiedObjects =
objects.map { fixture ->
fixture.copy(outputValue = fixture.outputValue?.serializeToString())
}
@JvmStatic fun objects() = objects.toArgs()
@JvmStatic fun stringifiedObjects() = stringifiedObjects.toArgs()
}
object DataCoercionArrayFixtures {
const val EMPTY_ARRAY = "empty array"
const val NORMAL_ARRAY = "normal array"
val arrays =
listOf(
case(NULL, NullValue, null),
case(EMPTY_ARRAY, ArrayValue(emptyList()), emptyList<Any?>()),
case(NORMAL_ARRAY, ArrayValue(listOf(StringValue("foo"))), listOf("foo")),
)
val stringifiedArrays =
arrays.map { fixture ->
fixture.copy(outputValue = fixture.outputValue?.serializeToString())
}
@JvmStatic fun arrays() = arrays.toArgs()
@JvmStatic fun stringifiedArrays() = stringifiedArrays.toArgs()
}
const val UNION_INT_VALUE = "int value"
const val UNION_OBJ_VALUE = "object value"
const val UNION_STR_VALUE = "string value"
object DataCoercionUnionFixtures {
val unions =
listOf(
case(NULL, NullValue, null),
case(UNION_INT_VALUE, IntegerValue(42), 42L),
case(UNION_STR_VALUE, StringValue("foo"), "foo"),
case(
UNION_OBJ_VALUE,
ObjectValue(linkedMapOf("foo" to StringValue("bar"))),
mapOf("foo" to "bar")
),
)
val stringifiedUnions =
unions.map { fixture ->
fixture.copy(outputValue = fixture.outputValue?.serializeToString())
}
@JvmStatic fun unions() = unions.toArgs()
@JvmStatic fun stringifiedUnions() = stringifiedUnions.toArgs()
}
object DataCoercionLegacyUnionFixtures {
val unions =
listOf(
case(NULL, NullValue, null),
// Legacy union of int x object will select object, and you can't write an int to an
// object column.
// So we should null it out.
case(UNION_INT_VALUE, IntegerValue(42), null, Reason.DESTINATION_TYPECAST_ERROR),
// Similarly, we should null out strings.
case(UNION_STR_VALUE, StringValue("foo"), "foo"),
// But objects can be written as objects, so retain this value.
case(
UNION_OBJ_VALUE,
ObjectValue(linkedMapOf("foo" to StringValue("bar"))),
mapOf("foo" to "bar")
),
)
val stringifiedUnions =
DataCoercionUnionFixtures.unions.map { fixture ->
fixture.copy(outputValue = fixture.outputValue?.serializeToString())
}
@JvmStatic fun unions() = unions.toArgs()
@JvmStatic fun stringifiedUnions() = DataCoercionUnionFixtures.stringifiedUnions.toArgs()
}
// This is pretty much identical to UnionFixtures, but separating them in case we need to add
// different test cases for either of them.
object DataCoercionUnknownFixtures {
const val INT_VALUE = "integer value"
const val STR_VALUE = "string value"
const val OBJ_VALUE = "object value"
val unknowns =
listOf(
case(NULL, NullValue, null),
case(INT_VALUE, IntegerValue(42), 42L),
case(STR_VALUE, StringValue("foo"), "foo"),
case(
OBJ_VALUE,
ObjectValue(linkedMapOf("foo" to StringValue("bar"))),
mapOf("foo" to "bar")
),
)
val stringifiedUnknowns =
unknowns.map { fixture ->
fixture.copy(outputValue = fixture.outputValue?.serializeToString())
}
@JvmStatic fun unknowns() = unknowns.toArgs()
@JvmStatic fun stringifiedUnknowns() = stringifiedUnknowns.toArgs()
}
fun List<DataCoercionTestCase>.toArgs(): List<Arguments> =
this.map { Arguments.argumentSet(it.name, it.inputValue, it.outputValue, it.changeReason) }
.toList()
/**
* Utility method to use the BigDecimal constructor (supports exponential notation like `1e38`) to
* construct a BigInteger.
*/
fun bigint(str: String): BigInteger = BigDecimal(str).toBigIntegerExact()
/** Shorthand utility method to construct a bigint from a long */
fun bigint(long: Long): BigInteger = BigInteger.valueOf(long)
fun bigdec(str: String): BigDecimal = BigDecimal(str)
fun bigdec(double: Double): BigDecimal = BigDecimal.valueOf(double)
fun bigdec(int: Int): BigDecimal = BigDecimal.valueOf(int.toDouble())
fun odt(str: String): OffsetDateTime = OffsetDateTime.parse(str, dateTimeFormatter)
fun ldt(str: String): LocalDateTime = LocalDateTime.parse(str, dateTimeFormatter)
fun date(str: String): LocalDate = LocalDate.parse(str, dateFormatter)
// The default java.time.*.parse() behavior only accepts up to 4-digit years.
// Build a custom formatter to handle larger years.
val dateFormatter =
DateTimeFormatterBuilder()
// java.time.* supports up to 9-digit years
.appendValue(ChronoField.YEAR, 1, 9, SignStyle.NORMAL)
.appendLiteral('-')
.appendValue(ChronoField.MONTH_OF_YEAR)
.appendLiteral('-')
.appendValue(ChronoField.DAY_OF_MONTH)
.toFormatter()
val dateTimeFormatter =
DateTimeFormatterBuilder()
.append(dateFormatter)
.appendLiteral('T')
// Accepts strings with/without an offset, so we can use this formatter
// for both timestamp with and without timezone
.append(DateTimeFormatter.ISO_TIME)
.toFormatter()
/**
* Represents a single data coercion test case. You probably want to use [case] as a shorthand
* constructor.
*
* @param name A short human-readable name for the test. Primarily useful for tests where
* [inputValue] is either very long, or otherwise hard to read.
* @param inputValue The value to pass into [ValueCoercer.validate]
* @param outputValue The value that we expect to read back from the destination. Should be
* basically equivalent to the output of [ValueCoercer.validate]
* @param changeReason If `validate` returns Truncate/Nullify, the reason for that
* truncation/nullification. If `validate` returns Valid, this should be null.
*/
data class DataCoercionTestCase(
val name: String,
val inputValue: AirbyteValue,
val outputValue: Any?,
val changeReason: Reason? = null,
)
fun case(
name: String,
inputValue: AirbyteValue,
outputValue: Any?,
changeReason: Reason? = null,
) = DataCoercionTestCase(name, inputValue, outputValue, changeReason)
const val NULL = "null"

View File

@@ -0,0 +1,369 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.component
import io.airbyte.cdk.load.data.AirbyteValue
import io.airbyte.cdk.load.data.ArrayType
import io.airbyte.cdk.load.data.ArrayTypeWithoutSchema
import io.airbyte.cdk.load.data.BooleanType
import io.airbyte.cdk.load.data.BooleanValue
import io.airbyte.cdk.load.data.DateType
import io.airbyte.cdk.load.data.FieldType
import io.airbyte.cdk.load.data.IntegerType
import io.airbyte.cdk.load.data.NumberType
import io.airbyte.cdk.load.data.ObjectType
import io.airbyte.cdk.load.data.ObjectTypeWithEmptySchema
import io.airbyte.cdk.load.data.ObjectTypeWithoutSchema
import io.airbyte.cdk.load.data.StringType
import io.airbyte.cdk.load.data.TimeTypeWithTimezone
import io.airbyte.cdk.load.data.TimeTypeWithoutTimezone
import io.airbyte.cdk.load.data.TimestampTypeWithTimezone
import io.airbyte.cdk.load.data.TimestampTypeWithoutTimezone
import io.airbyte.cdk.load.data.UnionType
import io.airbyte.cdk.load.data.UnknownType
import io.airbyte.cdk.load.dataflow.transform.ValueCoercer
import io.airbyte.cdk.load.message.Meta
import io.airbyte.cdk.load.schema.TableSchemaFactory
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.cdk.load.util.Jsons
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange.Reason
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
import kotlinx.coroutines.test.runTest
/**
* The tests in this class are designed to reference the parameters defined in
* `DataCoercionFixtures.kt`. For example, you might annotate [`handle integer values`] with
* `@MethodSource("io.airbyte.cdk.load.component.DataCoercionIntegerFixtures#int32")`. See each
* fixture class for explanations of what behavior they are exercising.
*
* Note that this class _only_ exercises [ValueCoercer.validate]. You should write separate unit
* tests for [ValueCoercer.map]. For now, the `map` function is primarily intended for transforming
* `UnionType` fields into other types (typically `StringType`), at which point your `validate`
* implementation should be able to handle any StringValue (regardless of whether it was originally
* a StringType or UnionType).
*/
@MicronautTest(environments = ["component"], resolveParameters = false)
interface DataCoercionSuite {
val coercer: ValueCoercer
val airbyteMetaColumnMapping: Map<String, String>
get() = Meta.COLUMN_NAMES.associateWith { it }
val columnNameMapping: ColumnNameMapping
get() = ColumnNameMapping(mapOf("test" to "test"))
val opsClient: TableOperationsClient
val testClient: TestTableOperationsClient
val schemaFactory: TableSchemaFactory
val harness: TableOperationsTestHarness
get() =
TableOperationsTestHarness(
opsClient,
testClient,
schemaFactory,
airbyteMetaColumnMapping
)
/** Fixtures are defined in [DataCoercionIntegerFixtures]. */
fun `handle integer values`(
inputValue: AirbyteValue,
expectedValue: Any?,
expectedChangeReason: Reason?
) = runTest {
harness.testValueCoercion(
coercer,
columnNameMapping,
FieldType(IntegerType, nullable = true),
inputValue,
expectedValue,
expectedChangeReason,
)
}
/** Fixtures are defined in [DataCoercionNumberFixtures]. */
fun `handle number values`(
inputValue: AirbyteValue,
expectedValue: Any?,
expectedChangeReason: Reason?
) = runTest {
harness.testValueCoercion(
coercer,
columnNameMapping,
FieldType(NumberType, nullable = true),
inputValue,
expectedValue,
expectedChangeReason,
)
}
/** Fixtures are defined in [DataCoercionTimestampTzFixtures]. */
fun `handle timestamptz values`(
inputValue: AirbyteValue,
expectedValue: Any?,
expectedChangeReason: Reason?
) = runTest {
harness.testValueCoercion(
coercer,
columnNameMapping,
FieldType(TimestampTypeWithTimezone, nullable = true),
inputValue,
expectedValue,
expectedChangeReason,
)
}
/** Fixtures are defined in [DataCoercionTimestampNtzFixtures]. */
fun `handle timestampntz values`(
inputValue: AirbyteValue,
expectedValue: Any?,
expectedChangeReason: Reason?
) = runTest {
harness.testValueCoercion(
coercer,
columnNameMapping,
FieldType(TimestampTypeWithoutTimezone, nullable = true),
inputValue,
expectedValue,
expectedChangeReason,
)
}
/** Fixtures are defined in [DataCoercionTimeTzFixtures]. */
fun `handle timetz values`(
inputValue: AirbyteValue,
expectedValue: Any?,
expectedChangeReason: Reason?
) = runTest {
harness.testValueCoercion(
coercer,
columnNameMapping,
FieldType(TimeTypeWithTimezone, nullable = true),
inputValue,
expectedValue,
expectedChangeReason,
)
}
/** Fixtures are defined in [DataCoercionTimeNtzFixtures]. */
fun `handle timentz values`(
inputValue: AirbyteValue,
expectedValue: Any?,
expectedChangeReason: Reason?
) = runTest {
harness.testValueCoercion(
coercer,
columnNameMapping,
FieldType(TimeTypeWithoutTimezone, nullable = true),
inputValue,
expectedValue,
expectedChangeReason,
)
}
/** Fixtures are defined in [DataCoercionDateFixtures]. */
fun `handle date values`(
inputValue: AirbyteValue,
expectedValue: Any?,
expectedChangeReason: Reason?
) = runTest {
harness.testValueCoercion(
coercer,
columnNameMapping,
FieldType(DateType, nullable = true),
inputValue,
expectedValue,
expectedChangeReason,
)
}
/** No fixtures, hardcoded to just write `true` */
fun `handle bool values`(expectedValue: Any?) = runTest {
harness.testValueCoercion(
coercer,
columnNameMapping,
FieldType(BooleanType, nullable = true),
// Just test on `true` and assume `false` also works
BooleanValue(true),
expectedValue,
// If your destination is nulling/truncating booleans... that's almost definitely a bug
expectedChangeReason = null,
)
}
/** Fixtures are defined in [DataCoercionStringFixtures]. */
fun `handle string values`(
inputValue: AirbyteValue,
expectedValue: Any?,
expectedChangeReason: Reason?
) = runTest {
harness.testValueCoercion(
coercer,
columnNameMapping,
FieldType(StringType, nullable = true),
inputValue,
expectedValue,
expectedChangeReason,
)
}
/** Fixtures are defined in [DataCoercionObjectFixtures]. */
fun `handle object values`(
inputValue: AirbyteValue,
expectedValue: Any?,
expectedChangeReason: Reason?
) = runTest {
harness.testValueCoercion(
coercer,
columnNameMapping,
FieldType(
ObjectType(linkedMapOf("foo" to FieldType(StringType, true))),
nullable = true
),
inputValue,
expectedValue,
expectedChangeReason,
)
}
/** Fixtures are defined in [DataCoercionObjectFixtures]. */
fun `handle empty object values`(
inputValue: AirbyteValue,
expectedValue: Any?,
expectedChangeReason: Reason?
) = runTest {
harness.testValueCoercion(
coercer,
columnNameMapping,
FieldType(ObjectTypeWithEmptySchema, nullable = true),
inputValue,
expectedValue,
expectedChangeReason,
)
}
/** Fixtures are defined in [DataCoercionObjectFixtures]. */
fun `handle schemaless object values`(
inputValue: AirbyteValue,
expectedValue: Any?,
expectedChangeReason: Reason?
) = runTest {
harness.testValueCoercion(
coercer,
columnNameMapping,
FieldType(ObjectTypeWithoutSchema, nullable = true),
inputValue,
expectedValue,
expectedChangeReason,
)
}
/** Fixtures are defined in [DataCoercionArrayFixtures]. */
fun `handle array values`(
inputValue: AirbyteValue,
expectedValue: Any?,
expectedChangeReason: Reason?
) = runTest {
harness.testValueCoercion(
coercer,
columnNameMapping,
FieldType(ArrayType(FieldType(StringType, true)), nullable = true),
inputValue,
expectedValue,
expectedChangeReason,
)
}
/** Fixtures are defined in [DataCoercionArrayFixtures]. */
fun `handle schemaless array values`(
inputValue: AirbyteValue,
expectedValue: Any?,
expectedChangeReason: Reason?
) = runTest {
harness.testValueCoercion(
coercer,
columnNameMapping,
FieldType(ArrayTypeWithoutSchema, nullable = true),
inputValue,
expectedValue,
expectedChangeReason,
)
}
/**
* All destinations should implement this, even if your destination is supporting legacy unions.
*
* Fixtures are defined in [DataCoercionUnionFixtures].
*/
fun `handle union values`(
inputValue: AirbyteValue,
expectedValue: Any?,
expectedChangeReason: Reason?
) = runTest {
harness.testValueCoercion(
coercer,
columnNameMapping,
FieldType(
UnionType(
setOf(
ObjectType(linkedMapOf("foo" to FieldType(StringType, true))),
IntegerType,
StringType,
),
isLegacyUnion = false
),
nullable = true
),
inputValue,
expectedValue,
expectedChangeReason,
)
}
/**
* Only legacy destinations that are maintaining "legacy" union behavior should implement this
* test. If you're not sure, check whether your `application-connector.yaml` includes a
* `airbyte.destination.core.types.unions: LEGACY` property.
*
* Fixtures are defined in [DataCoercionLegacyUnionFixtures].
*/
fun `handle legacy union values`(
inputValue: AirbyteValue,
expectedValue: Any?,
expectedChangeReason: Reason?
) = runTest {
harness.testValueCoercion(
coercer,
columnNameMapping,
FieldType(
UnionType(
setOf(
ObjectType(linkedMapOf("foo" to FieldType(StringType, true))),
IntegerType,
StringType,
),
isLegacyUnion = true
),
nullable = true
),
inputValue,
expectedValue,
expectedChangeReason,
)
}
fun `handle unknown values`(
inputValue: AirbyteValue,
expectedValue: Any?,
expectedChangeReason: Reason?
) = runTest {
harness.testValueCoercion(
coercer,
columnNameMapping,
FieldType(UnknownType(Jsons.readTree(("""{"type": "potato"}"""))), nullable = true),
inputValue,
expectedValue,
expectedChangeReason,
)
}
}

View File

@@ -4,10 +4,7 @@
package io.airbyte.cdk.load.component
import io.airbyte.cdk.load.command.Append
import io.airbyte.cdk.load.command.Dedupe
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.command.ImportType
import io.airbyte.cdk.load.command.NamespaceMapper
import io.airbyte.cdk.load.data.AirbyteValue
import io.airbyte.cdk.load.data.ArrayType
@@ -26,15 +23,14 @@ import io.airbyte.cdk.load.data.TimeTypeWithoutTimezone
import io.airbyte.cdk.load.data.TimestampTypeWithTimezone
import io.airbyte.cdk.load.data.TimestampTypeWithoutTimezone
import io.airbyte.cdk.load.data.TimestampWithTimezoneValue
import io.airbyte.cdk.load.data.UnionType
import io.airbyte.cdk.load.data.UnknownType
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_EXTRACTED_AT
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_GENERATION_ID
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_META
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_RAW_ID
import io.airbyte.cdk.load.schema.model.ColumnSchema
import io.airbyte.cdk.load.schema.model.StreamTableSchema
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.schema.model.TableNames
import io.airbyte.cdk.load.table.CDC_DELETED_AT_COLUMN
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.cdk.load.util.Jsons
@@ -89,6 +85,18 @@ object TableOperationsFixtures {
"array" to FieldType(ArrayType(FieldType(StringType, true)), true),
"object" to
FieldType(ObjectType(linkedMapOf("key" to FieldType(StringType, true))), true),
"union" to
FieldType(
UnionType(setOf(StringType, IntegerType), isLegacyUnion = false),
true
),
// Most destinations just ignore the isLegacyUnion flag, which is totally fine.
// This is here for the small set of connectors that respect it.
"legacy_union" to
FieldType(
UnionType(setOf(StringType, IntegerType), isLegacyUnion = true),
true
),
"unknown" to FieldType(UnknownType(Jsons.readTree("""{"type": "potato"}""")), true),
),
)
@@ -106,6 +114,8 @@ object TableOperationsFixtures {
"time_ntz" to "time_ntz",
"array" to "array",
"object" to "object",
"union" to "union",
"legacy_union" to "legacy_union",
"unknown" to "unknown",
)
)
@@ -678,105 +688,24 @@ object TableOperationsFixtures {
}
// Create common destination stream configurations
fun createAppendStream(
namespace: String,
name: String,
schema: ObjectType,
generationId: Long = 1,
minimumGenerationId: Long = 0,
syncId: Long = 1,
): DestinationStream =
DestinationStream(
unmappedNamespace = namespace,
unmappedName = name,
importType = Append,
generationId = generationId,
minimumGenerationId = minimumGenerationId,
syncId = syncId,
schema = schema,
namespaceMapper = NamespaceMapper(),
tableSchema =
StreamTableSchema(
tableNames = TableNames(finalTableName = TableName(namespace, name)),
columnSchema =
ColumnSchema(
inputSchema = schema.properties,
inputToFinalColumnNames = schema.properties.keys.associateWith { it },
finalSchema = mapOf(),
),
importType = Append,
)
)
fun createDedupeStream(
namespace: String,
name: String,
schema: ObjectType,
primaryKey: List<List<String>>,
cursor: List<String>,
generationId: Long = 1,
minimumGenerationId: Long = 0,
syncId: Long = 1,
): DestinationStream =
DestinationStream(
unmappedNamespace = namespace,
unmappedName = name,
importType =
Dedupe(
primaryKey = primaryKey,
cursor = cursor,
),
generationId = generationId,
minimumGenerationId = minimumGenerationId,
syncId = syncId,
schema = schema,
namespaceMapper = NamespaceMapper(),
tableSchema =
StreamTableSchema(
tableNames = TableNames(finalTableName = TableName(namespace, name)),
columnSchema =
ColumnSchema(
inputSchema = schema.properties,
inputToFinalColumnNames = schema.properties.keys.associateWith { it },
finalSchema = mapOf(),
),
importType =
Dedupe(
primaryKey = primaryKey,
cursor = cursor,
),
)
)
fun createStream(
namespace: String,
name: String,
schema: ObjectType,
importType: ImportType,
tableSchema: StreamTableSchema,
generationId: Long = 1,
minimumGenerationId: Long = 0,
syncId: Long = 1,
) =
): DestinationStream =
DestinationStream(
unmappedNamespace = namespace,
unmappedName = name,
importType = importType,
importType = tableSchema.importType,
generationId = generationId,
minimumGenerationId = minimumGenerationId,
syncId = syncId,
schema = schema,
schema = ObjectType(LinkedHashMap(tableSchema.columnSchema.inputSchema)),
namespaceMapper = NamespaceMapper(),
tableSchema =
StreamTableSchema(
tableNames = TableNames(finalTableName = TableName("namespace", "test")),
columnSchema =
ColumnSchema(
inputSchema = schema.properties,
inputToFinalColumnNames = mapOf(),
finalSchema = mapOf(),
),
importType = importType,
)
tableSchema = tableSchema,
)
fun <V> List<Map<String, V>>.sortBy(key: String) =
@@ -800,6 +729,11 @@ object TableOperationsFixtures {
return map { record -> record.mapKeys { (k, _) -> totalMapping.invert()[k] ?: k } }
}
fun <V> List<Map<String, V>>.removeAirbyteColumns(
airbyteMetaColumnMapping: Map<String, String>
): List<Map<String, V>> =
this.map { rec -> rec.filter { !airbyteMetaColumnMapping.containsValue(it.key) } }
fun <V> List<Map<String, V>>.removeNulls() =
this.map { record -> record.filterValues { it != null } }

View File

@@ -4,6 +4,8 @@
package io.airbyte.cdk.load.component
import io.airbyte.cdk.load.command.Append
import io.airbyte.cdk.load.command.Dedupe
import io.airbyte.cdk.load.component.TableOperationsFixtures as Fixtures
import io.airbyte.cdk.load.component.TableOperationsFixtures.assertEquals
import io.airbyte.cdk.load.component.TableOperationsFixtures.insertRecords
@@ -18,6 +20,7 @@ import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_EXTRACTED_AT
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_GENERATION_ID
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_META
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_RAW_ID
import io.airbyte.cdk.load.schema.TableSchemaFactory
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
import kotlinx.coroutines.test.runTest
@@ -48,12 +51,15 @@ interface TableOperationsSuite {
/** The database client instance to test. Must be properly configured and connected. */
val client: TableOperationsClient
val testClient: TestTableOperationsClient
val schemaFactory: TableSchemaFactory
// since ColumnNameMapping doesn't include the airbyte columns...
val airbyteMetaColumnMapping: Map<String, String>
get() = Meta.COLUMN_NAMES.associateWith { it }
private val harness: TableOperationsTestHarness
get() = TableOperationsTestHarness(client, testClient, airbyteMetaColumnMapping)
get() =
TableOperationsTestHarness(client, testClient, schemaFactory, airbyteMetaColumnMapping)
/** Tests basic database connectivity by pinging the database. */
fun `connect to database`() = runTest { assertDoesNotThrow { testClient.ping() } }
@@ -84,16 +90,19 @@ interface TableOperationsSuite {
val testTable = Fixtures.generateTestTableName("table-test-table", testNamespace)
harness.assertTableDoesNotExist(testTable)
val tableSchema =
schemaFactory.make(testTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
try {
client.createTable(
tableName = testTable,
columnNameMapping = Fixtures.TEST_MAPPING,
stream =
Fixtures.createAppendStream(
Fixtures.createStream(
namespace = testTable.namespace,
name = testTable.name,
schema = Fixtures.TEST_INTEGER_SCHEMA,
tableSchema = tableSchema,
),
replace = false,
)
@@ -129,11 +138,20 @@ interface TableOperationsSuite {
val testTable = Fixtures.generateTestTableName("insert-test-table", testNamespace)
harness.assertTableDoesNotExist(testTable)
val tableSchema =
schemaFactory.make(testTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
val stream =
Fixtures.createStream(
namespace = testTable.namespace,
name = testTable.name,
tableSchema = tableSchema,
)
try {
harness.createTestTableAndVerifyExists(
tableName = testTable,
schema = Fixtures.TEST_INTEGER_SCHEMA,
columnNameMapping = columnNameMapping,
stream = stream,
)
testClient.insertRecords(testTable, inputRecords, columnNameMapping)
@@ -142,7 +160,7 @@ interface TableOperationsSuite {
assertEquals(
expectedRecords,
resultRecords.reverseColumnNameMapping(columnNameMapping, airbyteMetaColumnMapping)
resultRecords.reverseColumnNameMapping(columnNameMapping, airbyteMetaColumnMapping),
)
} finally {
harness.cleanupTable(testTable)
@@ -174,11 +192,20 @@ interface TableOperationsSuite {
val testTable = Fixtures.generateTestTableName("count-test-table", testNamespace)
harness.assertTableDoesNotExist(testTable)
val tableSchema =
schemaFactory.make(testTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
val stream =
Fixtures.createStream(
namespace = testTable.namespace,
name = testTable.name,
tableSchema = tableSchema,
)
try {
harness.createTestTableAndVerifyExists(
tableName = testTable,
schema = Fixtures.TEST_INTEGER_SCHEMA,
columnNameMapping = columnNameMapping,
stream = stream,
)
val records1 =
@@ -322,11 +349,20 @@ interface TableOperationsSuite {
val testTable = Fixtures.generateTestTableName("gen-id-test-table", testNamespace)
harness.assertTableDoesNotExist(testTable)
val tableSchema =
schemaFactory.make(testTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
val stream =
Fixtures.createStream(
namespace = testTable.namespace,
name = testTable.name,
tableSchema = tableSchema,
)
try {
harness.createTestTableAndVerifyExists(
tableName = testTable,
schema = Fixtures.TEST_INTEGER_SCHEMA,
columnNameMapping = columnNameMapping,
stream = stream,
)
val genId = 17L
@@ -382,18 +418,36 @@ interface TableOperationsSuite {
harness.assertTableDoesNotExist(sourceTable)
harness.assertTableDoesNotExist(targetTable)
val sourceTableSchema =
schemaFactory.make(sourceTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
val sourceStream =
Fixtures.createStream(
namespace = sourceTable.namespace,
name = sourceTable.name,
tableSchema = sourceTableSchema,
)
val targetTableSchema =
schemaFactory.make(targetTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
val targetStream =
Fixtures.createStream(
namespace = targetTable.namespace,
name = targetTable.name,
tableSchema = targetTableSchema,
)
try {
harness.createTestTableAndVerifyExists(
sourceTable,
Fixtures.TEST_INTEGER_SCHEMA,
columnNameMapping,
tableName = sourceTable,
columnNameMapping = columnNameMapping,
stream = sourceStream,
)
harness.insertAndVerifyRecordCount(sourceTable, sourceInputRecords, columnNameMapping)
harness.createTestTableAndVerifyExists(
targetTable,
Fixtures.TEST_INTEGER_SCHEMA,
columnNameMapping,
tableName = targetTable,
columnNameMapping = columnNameMapping,
stream = targetStream,
)
harness.insertAndVerifyRecordCount(targetTable, targetInputRecords, columnNameMapping)
@@ -405,7 +459,7 @@ interface TableOperationsSuite {
expectedRecords,
overwrittenTableRecords.reverseColumnNameMapping(
columnNameMapping,
airbyteMetaColumnMapping
airbyteMetaColumnMapping,
),
"test",
"Expected records were not in the overwritten table.",
@@ -454,18 +508,36 @@ interface TableOperationsSuite {
harness.assertTableDoesNotExist(sourceTable)
harness.assertTableDoesNotExist(targetTable)
val sourceTableSchema =
schemaFactory.make(sourceTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
val sourceStream =
Fixtures.createStream(
namespace = sourceTable.namespace,
name = sourceTable.name,
tableSchema = sourceTableSchema,
)
val targetTableSchema =
schemaFactory.make(targetTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
val targetStream =
Fixtures.createStream(
namespace = targetTable.namespace,
name = targetTable.name,
tableSchema = targetTableSchema,
)
try {
harness.createTestTableAndVerifyExists(
sourceTable,
Fixtures.TEST_INTEGER_SCHEMA,
columnNameMapping,
tableName = sourceTable,
columnNameMapping = columnNameMapping,
stream = sourceStream,
)
harness.insertAndVerifyRecordCount(sourceTable, sourceInputRecords, columnNameMapping)
harness.createTestTableAndVerifyExists(
targetTable,
Fixtures.TEST_INTEGER_SCHEMA,
columnNameMapping,
tableName = targetTable,
columnNameMapping = columnNameMapping,
stream = targetStream,
)
harness.insertAndVerifyRecordCount(targetTable, targetInputRecords, columnNameMapping)
@@ -477,10 +549,10 @@ interface TableOperationsSuite {
expectedRecords,
copyTableRecords.reverseColumnNameMapping(
columnNameMapping,
airbyteMetaColumnMapping
airbyteMetaColumnMapping,
),
"test",
"Expected source records were not copied to the target table."
"Expected source records were not copied to the target table.",
)
} finally {
harness.cleanupTable(sourceTable)
@@ -520,31 +592,38 @@ interface TableOperationsSuite {
harness.assertTableDoesNotExist(sourceTable)
val sourceTableSchema =
schemaFactory.make(sourceTable, Fixtures.ID_TEST_WITH_CDC_SCHEMA.properties, Append)
val sourceStream =
Fixtures.createAppendStream(
Fixtures.createStream(
namespace = sourceTable.namespace,
name = sourceTable.name,
schema = Fixtures.ID_TEST_WITH_CDC_SCHEMA,
tableSchema = sourceTableSchema,
)
val targetTable = Fixtures.generateTestTableName("upsert-test-target-table", testNamespace)
harness.assertTableDoesNotExist(targetTable)
val targetStream =
Fixtures.createDedupeStream(
namespace = targetTable.namespace,
name = targetTable.name,
schema = Fixtures.ID_TEST_WITH_CDC_SCHEMA,
val targetTableSchema =
schemaFactory.make(
targetTable,
Fixtures.ID_TEST_WITH_CDC_SCHEMA.properties,
Dedupe(
primaryKey = listOf(listOf(Fixtures.ID_FIELD)),
cursor = listOf(Fixtures.TEST_FIELD),
),
)
val targetStream =
Fixtures.createStream(
namespace = targetTable.namespace,
name = targetTable.name,
tableSchema = targetTableSchema,
)
try {
harness.createTestTableAndVerifyExists(
tableName = sourceTable,
columnNameMapping = columnNameMapping,
schema = Fixtures.ID_AND_TEST_SCHEMA,
stream = sourceStream,
)
harness.insertAndVerifyRecordCount(sourceTable, sourceInputRecords, columnNameMapping)
@@ -552,7 +631,6 @@ interface TableOperationsSuite {
harness.createTestTableAndVerifyExists(
tableName = targetTable,
columnNameMapping = columnNameMapping,
schema = Fixtures.ID_TEST_WITH_CDC_SCHEMA,
stream = targetStream,
)
harness.insertAndVerifyRecordCount(targetTable, targetInputRecords, columnNameMapping)
@@ -565,10 +643,10 @@ interface TableOperationsSuite {
expectedRecords,
upsertTableRecords.reverseColumnNameMapping(
columnNameMapping,
airbyteMetaColumnMapping
airbyteMetaColumnMapping,
),
"id",
"Upserted table did not contain expected records."
"Upserted table did not contain expected records.",
)
} finally {
harness.cleanupTable(sourceTable)

View File

@@ -4,13 +4,24 @@
package io.airbyte.cdk.load.component
import io.airbyte.cdk.load.command.Append
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.component.TableOperationsFixtures.createAppendStream
import io.airbyte.cdk.load.component.TableOperationsFixtures.inputRecord
import io.airbyte.cdk.load.component.TableOperationsFixtures.insertRecords
import io.airbyte.cdk.load.component.TableOperationsFixtures.removeAirbyteColumns
import io.airbyte.cdk.load.component.TableOperationsFixtures.removeNulls
import io.airbyte.cdk.load.component.TableOperationsFixtures.reverseColumnNameMapping
import io.airbyte.cdk.load.data.AirbyteValue
import io.airbyte.cdk.load.data.EnrichedAirbyteValue
import io.airbyte.cdk.load.data.FieldType
import io.airbyte.cdk.load.data.NullValue
import io.airbyte.cdk.load.data.ObjectType
import io.airbyte.cdk.load.dataflow.transform.ValidationResult
import io.airbyte.cdk.load.dataflow.transform.ValueCoercer
import io.airbyte.cdk.load.schema.TableSchemaFactory
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange.Reason
import io.github.oshai.kotlinlogging.KotlinLogging
import org.junit.jupiter.api.Assertions.assertEquals
@@ -23,20 +34,15 @@ private val log = KotlinLogging.logger {}
class TableOperationsTestHarness(
private val client: TableOperationsClient,
private val testClient: TestTableOperationsClient,
private val schemaFactory: TableSchemaFactory,
private val airbyteMetaColumnMapping: Map<String, String>,
) {
/** Creates a test table with the given configuration and verifies it was created. */
suspend fun createTestTableAndVerifyExists(
tableName: TableName,
schema: ObjectType,
columnNameMapping: ColumnNameMapping,
stream: DestinationStream =
createAppendStream(
namespace = tableName.namespace,
name = tableName.name,
schema = schema,
)
stream: DestinationStream
) {
client.createTable(
stream = stream,
@@ -108,8 +114,77 @@ class TableOperationsTestHarness(
/** Reads records from a table, filtering out Meta columns. */
suspend fun readTableWithoutMetaColumns(tableName: TableName): List<Map<String, Any>> {
val tableRead = testClient.readTable(tableName)
return tableRead.map { rec ->
rec.filter { !airbyteMetaColumnMapping.containsValue(it.key) }
return tableRead.removeAirbyteColumns(airbyteMetaColumnMapping)
}
/** Apply the coercer to a value and verify that we can write the coerced value correctly */
suspend fun testValueCoercion(
coercer: ValueCoercer,
columnNameMapping: ColumnNameMapping,
fieldType: FieldType,
inputValue: AirbyteValue,
expectedValue: Any?,
expectedChangeReason: Reason?,
) {
val testNamespace = TableOperationsFixtures.generateTestNamespace("test")
val tableName =
TableOperationsFixtures.generateTestTableName("table-test-table", testNamespace)
val schema = ObjectType(linkedMapOf("test" to fieldType))
val tableSchema = schemaFactory.make(tableName, schema.properties, Append)
val stream =
TableOperationsFixtures.createStream(
namespace = tableName.namespace,
name = tableName.name,
tableSchema = tableSchema,
)
val inputValueAsEnrichedAirbyteValue =
EnrichedAirbyteValue(
inputValue,
fieldType.type,
"test",
airbyteMetaField = null,
)
val validatedValue = coercer.validate(inputValueAsEnrichedAirbyteValue)
val valueToInsert: AirbyteValue
val changeReason: Reason?
when (validatedValue) {
is ValidationResult.ShouldNullify -> {
valueToInsert = NullValue
changeReason = validatedValue.reason
}
is ValidationResult.ShouldTruncate -> {
valueToInsert = validatedValue.truncatedValue
changeReason = validatedValue.reason
}
ValidationResult.Valid -> {
valueToInsert = inputValue
changeReason = null
}
}
client.createNamespace(testNamespace)
client.createTable(stream, tableName, columnNameMapping, replace = false)
testClient.insertRecords(
tableName,
columnNameMapping,
inputRecord("test" to valueToInsert),
)
val actualRecords =
testClient
.readTable(tableName)
.removeAirbyteColumns(airbyteMetaColumnMapping)
.reverseColumnNameMapping(columnNameMapping, airbyteMetaColumnMapping)
.removeNulls()
val actualValue = actualRecords.first()["test"]
assertEquals(
expectedValue,
actualValue,
"For input $inputValue, expected ${expectedValue.simpleClassName()}; actual value was ${actualValue.simpleClassName()}. Coercer output was $validatedValue.",
)
assertEquals(expectedChangeReason, changeReason)
}
}
fun Any?.simpleClassName() = this?.let { it::class.simpleName } ?: "null"

View File

@@ -24,6 +24,7 @@ import io.airbyte.cdk.load.data.ObjectType
import io.airbyte.cdk.load.data.StringType
import io.airbyte.cdk.load.data.StringValue
import io.airbyte.cdk.load.message.Meta
import io.airbyte.cdk.load.schema.TableSchemaFactory
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
@@ -40,9 +41,16 @@ interface TableSchemaEvolutionSuite {
val opsClient: TableOperationsClient
val testClient: TestTableOperationsClient
val schemaFactory: TableSchemaFactory
private val harness: TableOperationsTestHarness
get() = TableOperationsTestHarness(opsClient, testClient, airbyteMetaColumnMapping)
get() =
TableOperationsTestHarness(
opsClient,
testClient,
schemaFactory,
airbyteMetaColumnMapping
)
/**
* Test that the connector can correctly discover all of its own data types. This test creates a
@@ -61,11 +69,13 @@ interface TableSchemaEvolutionSuite {
) = runTest {
val testNamespace = Fixtures.generateTestNamespace("namespace-test")
val testTable = Fixtures.generateTestTableName("table-test-table", testNamespace)
val tableSchema =
schemaFactory.make(testTable, Fixtures.ALL_TYPES_SCHEMA.properties, Append)
val stream =
Fixtures.createAppendStream(
Fixtures.createStream(
namespace = testTable.namespace,
name = testTable.name,
schema = Fixtures.ALL_TYPES_SCHEMA,
tableSchema = tableSchema,
)
opsClient.createNamespace(testNamespace)
@@ -97,11 +107,13 @@ interface TableSchemaEvolutionSuite {
) {
val testNamespace = Fixtures.generateTestNamespace("namespace-test")
val testTable = Fixtures.generateTestTableName("table-test-table", testNamespace)
val tableSchema =
schemaFactory.make(testTable, Fixtures.ALL_TYPES_SCHEMA.properties, Append)
val stream =
Fixtures.createAppendStream(
Fixtures.createStream(
namespace = testTable.namespace,
name = testTable.name,
schema = Fixtures.ALL_TYPES_SCHEMA,
tableSchema = tableSchema,
)
val computedSchema = client.computeSchema(stream, columnNameMapping)
assertEquals(expectedComputedSchema, computedSchema)
@@ -374,12 +386,13 @@ interface TableSchemaEvolutionSuite {
"to_drop" to FieldType(StringType, true),
),
)
val initialTableSchema =
schemaFactory.make(testTable, initialSchema.properties, initialStreamImportType)
val initialStream =
Fixtures.createStream(
testTable.namespace,
testTable.name,
initialSchema,
initialStreamImportType,
initialTableSchema,
)
val modifiedSchema =
ObjectType(
@@ -391,12 +404,13 @@ interface TableSchemaEvolutionSuite {
"to_add" to FieldType(StringType, true),
),
)
val modifiedTableSchema =
schemaFactory.make(testTable, modifiedSchema.properties, modifiedStreamImportType)
val modifiedStream =
Fixtures.createStream(
testTable.namespace,
testTable.name,
modifiedSchema,
modifiedStreamImportType,
modifiedTableSchema,
)
// Create the table and compute the schema changeset
@@ -548,16 +562,16 @@ interface TableSchemaEvolutionSuite {
modifiedSchema: ObjectType,
modifiedColumnNameMapping: ColumnNameMapping,
initialStream: DestinationStream =
Fixtures.createAppendStream(
Fixtures.createStream(
namespace = testTable.namespace,
name = testTable.name,
schema = initialSchema,
tableSchema = schemaFactory.make(testTable, initialSchema.properties, Append),
),
modifiedStream: DestinationStream =
Fixtures.createAppendStream(
Fixtures.createStream(
namespace = testTable.namespace,
name = testTable.name,
schema = modifiedSchema,
tableSchema = schemaFactory.make(testTable, modifiedSchema.properties, Append),
),
): SchemaEvolutionComputation {
opsClient.createNamespace(testTable.namespace)

View File

@@ -1 +1 @@
version=0.1.85
version=0.1.91

View File

@@ -10,5 +10,6 @@ CONNECTOR_PATH_PREFIXES = {
"airbyte-integrations/connectors",
"docs/integrations/sources",
"docs/integrations/destinations",
"docs/ai-agents/connectors",
}
MERGE_METHOD = "squash"

View File

@@ -75,7 +75,7 @@ This will copy the specified connector version to your development bucket. This
_💡 Note: A prerequisite is you have [gsutil](https://cloud.google.com/storage/docs/gsutil) installed and have run `gsutil auth login`_
```bash
TARGET_BUCKET=<YOUR-DEV_BUCKET> CONNECTOR="airbyte/source-stripe" VERSION="3.17.0-dev.ea013c8741" poetry run poe copy-connector-from-prod
TARGET_BUCKET=<YOUR-DEV_BUCKET> CONNECTOR="airbyte/source-stripe" VERSION="3.17.0-preview.ea013c8" poetry run poe copy-connector-from-prod
```
### Promote Connector Version to Latest
@@ -87,5 +87,5 @@ _💡 Note: A prerequisite is you have [gsutil](https://cloud.google.com/storage
_⚠️ Warning: Its important to know that this will remove ANY existing files in the latest folder that are not in the versioned folder as it calls `gsutil rsync` with `-d` enabled._
```bash
TARGET_BUCKET=<YOUR-DEV_BUCKET> CONNECTOR="airbyte/source-stripe" VERSION="3.17.0-dev.ea013c8741" poetry run poe promote-connector-to-latest
TARGET_BUCKET=<YOUR-DEV_BUCKET> CONNECTOR="airbyte/source-stripe" VERSION="3.17.0-preview.ea013c8" poetry run poe promote-connector-to-latest
```

View File

@@ -28,8 +28,8 @@ def get_docker_hub_auth_token() -> str:
def get_docker_hub_headers() -> Dict | None:
if "DOCKER_HUB_USERNAME" not in os.environ or "DOCKER_HUB_PASSWORD" not in os.environ:
# If the Docker Hub credentials are not provided, we can only anonymously call the Docker Hub API.
if not os.environ.get("DOCKER_HUB_USERNAME") or not os.environ.get("DOCKER_HUB_PASSWORD"):
# If the Docker Hub credentials are not provided (or are empty), we can only anonymously call the Docker Hub API.
# This will only work for public images and lead to a lower rate limit.
return {}
else:

View File

@@ -434,7 +434,7 @@ def generate_and_persist_registry_entry(
bucket_name (str): The name of the GCS bucket.
repo_metadata_file_path (pathlib.Path): The path to the spec file.
registry_type (str): The registry type.
docker_image_tag (str): The docker image tag associated with this release. Typically a semver string (e.g. '1.2.3'), possibly with a suffix (e.g. '1.2.3-dev.abcde12345')
docker_image_tag (str): The docker image tag associated with this release. Typically a semver string (e.g. '1.2.3'), possibly with a suffix (e.g. '1.2.3-preview.abcde12')
is_prerelease (bool): Whether this is a prerelease, or a main release.
"""
# Read the repo metadata dict to bootstrap ourselves. We need the docker repository,
@@ -444,7 +444,7 @@ def generate_and_persist_registry_entry(
try:
# Now that we have the docker repo, read the appropriate versioned metadata from GCS.
# This metadata will differ in a few fields (e.g. in prerelease mode, dockerImageTag will contain the actual prerelease tag `1.2.3-dev.abcde12345`),
# This metadata will differ in a few fields (e.g. in prerelease mode, dockerImageTag will contain the actual prerelease tag `1.2.3-preview.abcde12`),
# so we'll treat this as the source of truth (ish. See below for how we handle the registryOverrides field.)
gcs_client = get_gcs_storage_client(gcs_creds=os.environ.get("GCS_CREDENTIALS"))
bucket = gcs_client.bucket(bucket_name)
@@ -533,7 +533,9 @@ def generate_and_persist_registry_entry(
# For latest versions that are disabled, delete any existing registry entry to remove it from the registry
if (
"-rc" not in metadata_dict["data"]["dockerImageTag"] and "-dev" not in metadata_dict["data"]["dockerImageTag"]
"-rc" not in metadata_dict["data"]["dockerImageTag"]
and "-dev" not in metadata_dict["data"]["dockerImageTag"]
and "-preview" not in metadata_dict["data"]["dockerImageTag"]
) and not metadata_dict["data"]["registryOverrides"][registry_type]["enabled"]:
logger.info(
f"{registry_type} is not enabled: deleting existing {registry_type} registry entry for {metadata_dict['data']['dockerRepository']} at latest path."

View File

@@ -5,7 +5,7 @@ data:
connectorType: source
dockerRepository: airbyte/image-exists-1
githubIssueLabel: source-alloydb-strict-encrypt
dockerImageTag: 2.0.0-dev.cf3628ccf3
dockerImageTag: 2.0.0-preview.cf3628c
documentationUrl: https://docs.airbyte.com/integrations/sources/existingsource
connectorSubtype: database
releaseStage: generally_available

View File

@@ -231,7 +231,7 @@ def test_upload_prerelease(mocker, valid_metadata_yaml_files, tmp_path):
mocker.patch.object(commands.click, "secho")
mocker.patch.object(commands, "upload_metadata_to_gcs")
prerelease_tag = "0.3.0-dev.6d33165120"
prerelease_tag = "0.3.0-preview.6d33165"
bucket = "my-bucket"
metadata_file_path = valid_metadata_yaml_files[0]
validator_opts = ValidatorOptions(docs_path=str(tmp_path), prerelease_tag=prerelease_tag)

View File

@@ -582,7 +582,7 @@ def test_upload_metadata_to_gcs_invalid_docker_images(mocker, invalid_metadata_u
def test_upload_metadata_to_gcs_with_prerelease(mocker, valid_metadata_upload_files, tmp_path):
mocker.spy(gcs_upload, "_file_upload")
mocker.spy(gcs_upload, "upload_file_if_changed")
prerelease_image_tag = "1.5.6-dev.f80318f754"
prerelease_image_tag = "1.5.6-preview.f80318f"
for valid_metadata_upload_file in valid_metadata_upload_files:
tmp_metadata_file_path = tmp_path / "metadata.yaml"
@@ -701,7 +701,7 @@ def test_upload_metadata_to_gcs_release_candidate(mocker, get_fixture_path, tmp_
)
assert metadata.data.releases.rolloutConfiguration.enableProgressiveRollout
prerelease_tag = "1.5.6-dev.f80318f754" if prerelease else None
prerelease_tag = "1.5.6-preview.f80318f" if prerelease else None
upload_info = gcs_upload.upload_metadata_to_gcs(
"my_bucket",

View File

@@ -110,14 +110,14 @@ class PublishConnectorContext(ConnectorContext):
@property
def pre_release_suffix(self) -> str:
return self.git_revision[:10]
return self.git_revision[:7]
@property
def docker_image_tag(self) -> str:
# get the docker image tag from the parent class
metadata_tag = super().docker_image_tag
if self.pre_release:
return f"{metadata_tag}-dev.{self.pre_release_suffix}"
return f"{metadata_tag}-preview.{self.pre_release_suffix}"
else:
return metadata_tag

View File

@@ -25,7 +25,7 @@ from pipelines.helpers.utils import raise_if_not_user
from pipelines.models.steps import STEP_PARAMS, Step, StepResult
# Pin the PyAirbyte version to avoid updates from breaking CI
PYAIRBYTE_VERSION = "0.20.2"
PYAIRBYTE_VERSION = "0.35.1"
class PytestStep(Step, ABC):

View File

@@ -156,7 +156,8 @@ class TestPyAirbyteValidationTests:
result = await PyAirbyteValidation(context_for_valid_connector)._run(mocker.MagicMock())
assert isinstance(result, StepResult)
assert result.status == StepStatus.SUCCESS
assert "Getting `spec` output from connector..." in result.stdout
# Verify the connector name appears in output (stable across PyAirbyte versions)
assert context_for_valid_connector.connector.technical_name in (result.stdout + result.stderr)
async def test__run_validation_skip_unpublished_connector(
self,

View File

@@ -1,2 +1,2 @@
cdkVersion=0.1.84
cdkVersion=0.1.89
JunitMethodExecutionTimeout=10m

View File

@@ -2,7 +2,7 @@ data:
connectorSubtype: database
connectorType: destination
definitionId: ce0d828e-1dc4-496c-b122-2da42e637e48
dockerImageTag: 2.1.16-rc.1
dockerImageTag: 2.1.18
dockerRepository: airbyte/destination-clickhouse
githubIssueLabel: destination-clickhouse
icon: clickhouse.svg
@@ -27,7 +27,7 @@ data:
releaseStage: generally_available
releases:
rolloutConfiguration:
enableProgressiveRollout: true
enableProgressiveRollout: false
breakingChanges:
2.0.0:
message: "This connector has been re-written from scratch. Data will now be typed and stored in final (non-raw) tables. The connector may require changes to its configuration to function properly and downstream pipelines may be affected. Warning: SSH tunneling is in Beta."

View File

@@ -7,26 +7,24 @@ package io.airbyte.integrations.destination.clickhouse.client
import com.clickhouse.client.api.Client as ClickHouseClientRaw
import com.clickhouse.client.api.command.CommandResponse
import com.clickhouse.client.api.data_formats.ClickHouseBinaryFormatReader
import com.clickhouse.client.api.metadata.TableSchema
import com.clickhouse.client.api.query.QueryResponse
import com.clickhouse.data.ClickHouseColumn
import com.clickhouse.data.ClickHouseDataType
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings
import io.airbyte.cdk.ConfigErrorException
import io.airbyte.cdk.load.command.Dedupe
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.component.ColumnChangeset
import io.airbyte.cdk.load.component.ColumnType
import io.airbyte.cdk.load.component.TableColumns
import io.airbyte.cdk.load.component.TableOperationsClient
import io.airbyte.cdk.load.component.TableSchema
import io.airbyte.cdk.load.component.TableSchemaEvolutionClient
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAMES
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.cdk.load.table.TempTableNameGenerator
import io.airbyte.integrations.destination.clickhouse.client.ClickhouseSqlGenerator.Companion.DATETIME_WITH_PRECISION
import io.airbyte.integrations.destination.clickhouse.client.ClickhouseSqlGenerator.Companion.DECIMAL_WITH_PRECISION_AND_SCALE
import io.airbyte.integrations.destination.clickhouse.spec.ClickhouseConfiguration
import io.airbyte.integrations.destination.clickhouse.client.ClickhouseSqlTypes.DATETIME_WITH_PRECISION
import io.airbyte.integrations.destination.clickhouse.client.ClickhouseSqlTypes.DECIMAL_WITH_PRECISION_AND_SCALE
import io.github.oshai.kotlinlogging.KotlinLogging
import jakarta.inject.Singleton
import kotlinx.coroutines.future.await
@@ -42,7 +40,6 @@ class ClickhouseAirbyteClient(
private val client: ClickHouseClientRaw,
private val sqlGenerator: ClickhouseSqlGenerator,
private val tempTableNameGenerator: TempTableNameGenerator,
private val clickhouseConfiguration: ClickhouseConfiguration,
) : TableOperationsClient, TableSchemaEvolutionClient {
override suspend fun createNamespace(namespace: String) {
@@ -59,9 +56,8 @@ class ClickhouseAirbyteClient(
) {
execute(
sqlGenerator.createTable(
stream,
tableName,
columnNameMapping,
stream.tableSchema,
replace,
),
)
@@ -81,9 +77,10 @@ class ClickhouseAirbyteClient(
sourceTableName: TableName,
targetTableName: TableName
) {
val columnNames = columnNameMapping.values.toSet()
execute(
sqlGenerator.copyTable(
columnNameMapping,
columnNames,
sourceTableName,
targetTableName,
),
@@ -99,10 +96,8 @@ class ClickhouseAirbyteClient(
throw NotImplementedError("We rely on Clickhouse's table engine for deduping")
}
override suspend fun discoverSchema(
tableName: TableName
): io.airbyte.cdk.load.component.TableSchema {
val tableSchema: TableSchema = client.getTableSchema(tableName.name, tableName.namespace)
override suspend fun discoverSchema(tableName: TableName): TableSchema {
val tableSchema = client.getTableSchema(tableName.name, tableName.namespace)
log.info { "Fetch the clickhouse table schema: $tableSchema" }
@@ -121,7 +116,7 @@ class ClickhouseAirbyteClient(
log.info { "Found Clickhouse columns: $tableSchemaWithoutAirbyteColumns" }
return io.airbyte.cdk.load.component.TableSchema(
return TableSchema(
tableSchemaWithoutAirbyteColumns.associate {
it.columnName to ColumnType(it.dataType.getDataTypeAsString(), it.isNullable)
},
@@ -131,42 +126,8 @@ class ClickhouseAirbyteClient(
override fun computeSchema(
stream: DestinationStream,
columnNameMapping: ColumnNameMapping
): io.airbyte.cdk.load.component.TableSchema {
val importType = stream.importType
val primaryKey =
if (importType is Dedupe) {
sqlGenerator.extractPks(importType.primaryKey, columnNameMapping).toSet()
} else {
emptySet()
}
val cursor =
if (importType is Dedupe) {
if (importType.cursor.size > 1) {
throw ConfigErrorException(
"Only top-level cursors are supported. Got ${importType.cursor}"
)
}
importType.cursor.map { columnNameMapping[it] }.toSet()
} else {
emptySet()
}
return io.airbyte.cdk.load.component.TableSchema(
stream.schema
.asColumns()
.map { (fieldName, fieldType) ->
val clickhouseCompatibleName = columnNameMapping[fieldName]!!
val nullable =
!primaryKey.contains(clickhouseCompatibleName) &&
!cursor.contains(clickhouseCompatibleName)
val type = fieldType.type.toDialectType(clickhouseConfiguration.enableJson)
clickhouseCompatibleName to
ColumnType(
type = type,
nullable = nullable,
)
}
.toMap(),
)
): TableSchema {
return TableSchema(stream.tableSchema.columnSchema.finalSchema)
}
override suspend fun applyChangeset(
@@ -194,7 +155,6 @@ class ClickhouseAirbyteClient(
applyDeduplicationChanges(
stream,
tableName,
columnNameMapping,
columnChangeset,
)
} else if (!columnChangeset.isNoop()) {
@@ -205,42 +165,28 @@ class ClickhouseAirbyteClient(
private suspend fun applyDeduplicationChanges(
stream: DestinationStream,
properTableName: TableName,
columnNameMapping: ColumnNameMapping,
columnChangeset: ColumnChangeset,
) {
val tempTableName = tempTableNameGenerator.generate(properTableName)
execute(sqlGenerator.createNamespace(tempTableName.namespace))
execute(
sqlGenerator.createTable(
stream,
tempTableName,
columnNameMapping,
stream.tableSchema,
true,
),
)
copyIntersectionColumn(
columnChangeset.columnsToChange.keys + columnChangeset.columnsToRetain.keys,
columnNameMapping,
properTableName,
tempTableName
)
execute(sqlGenerator.exchangeTable(tempTableName, properTableName))
execute(sqlGenerator.dropTable(tempTableName))
}
internal suspend fun copyIntersectionColumn(
columnsToCopy: Set<String>,
columnNameMapping: ColumnNameMapping,
properTableName: TableName,
tempTableName: TableName
) {
val columnNames =
columnChangeset.columnsToChange.keys + columnChangeset.columnsToRetain.keys
execute(
sqlGenerator.copyTable(
ColumnNameMapping(columnNameMapping.filter { columnsToCopy.contains(it.value) }),
columnNames,
properTableName,
tempTableName,
),
)
execute(sqlGenerator.exchangeTable(tempTableName, properTableName))
execute(sqlGenerator.dropTable(tempTableName))
}
override suspend fun countTable(tableName: TableName): Long? {
@@ -251,7 +197,7 @@ class ClickhouseAirbyteClient(
reader.next()
val count = reader.getLong("cnt")
return count
} catch (e: Exception) {
} catch (_: Exception) {
return null
}
}
@@ -280,14 +226,18 @@ class ClickhouseAirbyteClient(
}
private fun ClickHouseDataType.getDataTypeAsString(): String {
return if (this.name == "DateTime64") {
return when (this.name) {
"DateTime64" -> {
DATETIME_WITH_PRECISION
} else if (this.name == "Decimal") {
}
"Decimal" -> {
DECIMAL_WITH_PRECISION_AND_SCALE
} else {
}
else -> {
this.name
}
}
}
override suspend fun namespaceExists(namespace: String): Boolean {
val resp = query("EXISTS DATABASE `$namespace`")

View File

@@ -4,136 +4,62 @@
package io.airbyte.integrations.destination.clickhouse.client
import com.clickhouse.data.ClickHouseDataType
import io.airbyte.cdk.load.command.Dedupe
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.component.ColumnChangeset
import io.airbyte.cdk.load.component.ColumnType
import io.airbyte.cdk.load.data.AirbyteType
import io.airbyte.cdk.load.data.ArrayType
import io.airbyte.cdk.load.data.ArrayTypeWithoutSchema
import io.airbyte.cdk.load.data.BooleanType
import io.airbyte.cdk.load.data.DateType
import io.airbyte.cdk.load.data.IntegerType
import io.airbyte.cdk.load.data.NumberType
import io.airbyte.cdk.load.data.ObjectType
import io.airbyte.cdk.load.data.ObjectTypeWithEmptySchema
import io.airbyte.cdk.load.data.ObjectTypeWithoutSchema
import io.airbyte.cdk.load.data.StringType
import io.airbyte.cdk.load.data.TimeTypeWithTimezone
import io.airbyte.cdk.load.data.TimeTypeWithoutTimezone
import io.airbyte.cdk.load.data.TimestampTypeWithTimezone
import io.airbyte.cdk.load.data.TimestampTypeWithoutTimezone
import io.airbyte.cdk.load.data.UnionType
import io.airbyte.cdk.load.data.UnknownType
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_EXTRACTED_AT
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_GENERATION_ID
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_META
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_RAW_ID
import io.airbyte.cdk.load.schema.model.StreamTableSchema
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.integrations.destination.clickhouse.client.ClickhouseSqlGenerator.Companion.DATETIME_WITH_PRECISION
import io.airbyte.integrations.destination.clickhouse.client.ClickhouseSqlGenerator.Companion.DECIMAL_WITH_PRECISION_AND_SCALE
import io.airbyte.integrations.destination.clickhouse.spec.ClickhouseConfiguration
import io.github.oshai.kotlinlogging.KotlinLogging
import jakarta.inject.Singleton
@Singleton
class ClickhouseSqlGenerator(
val clickhouseConfiguration: ClickhouseConfiguration,
) {
class ClickhouseSqlGenerator {
private val log = KotlinLogging.logger {}
/**
* This extension is here to avoid writing `.also { log.info { it }}` for every returned string
* we want to log
*/
private fun String.andLog(): String {
log.info { this }
return this
}
private fun isValidVersionColumnType(airbyteType: AirbyteType): Boolean {
// Must be of an integer type or of type Date/DateTime/DateTime64
return VALID_VERSION_COLUMN_TYPES.any { it.isInstance(airbyteType) }
}
fun createNamespace(namespace: String): String {
return "CREATE DATABASE IF NOT EXISTS `$namespace`;".andLog()
}
fun createTable(
stream: DestinationStream,
tableName: TableName,
columnNameMapping: ColumnNameMapping,
tableSchema: StreamTableSchema,
replace: Boolean,
): String {
val pks: List<String> =
when (stream.importType) {
is Dedupe -> extractPks((stream.importType as Dedupe).primaryKey, columnNameMapping)
else -> listOf()
}
// For ReplacingMergeTree, we need to make the cursor column non-nullable if it's used as
// version column. We'll also determine here if we need to fall back to extracted_at.
var useCursorAsVersionColumn = false
val nonNullableColumns =
mutableSetOf<String>().apply {
addAll(pks) // Primary keys are always non-nullable
if (stream.importType is Dedupe) {
val dedupeType = stream.importType as Dedupe
if (dedupeType.cursor.isNotEmpty()) {
val cursorFieldName = dedupeType.cursor.first()
val cursorColumnName = columnNameMapping[cursorFieldName] ?: cursorFieldName
// Check if the cursor column type is valid for ClickHouse
// ReplacingMergeTree
val cursorColumnType = stream.schema.asColumns()[cursorFieldName]?.type
if (
cursorColumnType != null && isValidVersionColumnType(cursorColumnType)
) {
// Cursor column is valid, use it as version column
add(cursorColumnName) // Make cursor column non-nullable too
useCursorAsVersionColumn = true
} else {
// Cursor column is invalid, we'll fall back to _airbyte_extracted_at
log.warn {
"Cursor column '$cursorFieldName' for stream '${stream.mappedDescriptor}' has type '${cursorColumnType?.let { it::class.simpleName }}' which is not valid for use as a version column in ClickHouse ReplacingMergeTree. " +
"Falling back to using _airbyte_extracted_at as version column. Valid types are: Integer, Date, Timestamp."
}
useCursorAsVersionColumn = false
}
}
// If no cursor is specified or cursor is invalid, we'll use
// _airbyte_extracted_at
// as version column, which is already non-nullable by default (defined in
// CREATE TABLE statement)
}
}
val columnDeclarations =
columnsAndTypes(stream, columnNameMapping, nonNullableColumns.toList())
val forceCreateTable = if (replace) "OR REPLACE" else ""
val pksAsString =
val finalSchema = tableSchema.columnSchema.finalSchema
val columnDeclarations =
finalSchema
.map { (columnName, columnType) -> "`$columnName` ${columnType.typeDecl()}" }
.joinToString(",\n")
val orderBy =
if (tableSchema.importType !is Dedupe) {
COLUMN_NAME_AB_RAW_ID
} else {
val pks = flattenPks(tableSchema.getPrimaryKey())
pks.joinToString(",") {
// Escape the columns
"`$it`"
}
}
val engine =
when (stream.importType) {
when (tableSchema.importType) {
is Dedupe -> {
val dedupeType = stream.importType as Dedupe
// Use cursor column as version column for ReplacingMergeTree if available and
// valid
// Check if cursor column type is valid for ClickHouse ReplacingMergeTree
val cursor = tableSchema.getCursor().firstOrNull()
val cursorType = cursor?.let { finalSchema[it]?.type }
val useCursorAsVersion =
cursorType != null && isValidVersionColumn(cursor, cursorType)
val versionColumn =
if (dedupeType.cursor.isNotEmpty() && useCursorAsVersionColumn) {
val cursorFieldName = dedupeType.cursor.first()
val cursorColumnName =
columnNameMapping[cursorFieldName] ?: cursorFieldName
"`$cursorColumnName`"
if (useCursorAsVersion) {
"`$cursor`"
} else {
// Fallback to _airbyte_extracted_at if no cursor is specified or cursor
// is invalid
@@ -152,33 +78,13 @@ class ClickhouseSqlGenerator(
$COLUMN_NAME_AB_GENERATION_ID UInt32 NOT NULL,
$columnDeclarations
)
ENGINE = ${engine}
ORDER BY (${if (pks.isEmpty()) {
"$COLUMN_NAME_AB_RAW_ID"
} else {
pksAsString
}})
ENGINE = $engine
ORDER BY ($orderBy)
"""
.trimIndent()
.andLog()
}
internal fun extractPks(
primaryKey: List<List<String>>,
columnNameMapping: ColumnNameMapping
): List<String> {
return primaryKey.map { fieldPath ->
if (fieldPath.size != 1) {
throw UnsupportedOperationException(
"Only top-level primary keys are supported, got $fieldPath",
)
}
val fieldName = fieldPath.first()
val columnName = columnNameMapping[fieldName] ?: fieldName
columnName
}
}
fun dropTable(tableName: TableName): String =
"DROP TABLE IF EXISTS `${tableName.namespace}`.`${tableName.name}`;".andLog()
@@ -191,11 +97,11 @@ class ClickhouseSqlGenerator(
.andLog()
fun copyTable(
columnNameMapping: ColumnNameMapping,
columnNames: Set<String>,
sourceTableName: TableName,
targetTableName: TableName,
): String {
val columnNames = columnNameMapping.map { (_, actualName) -> actualName }.joinToString(",")
val joinedNames = columnNames.joinToString(",")
// TODO can we use CDK builtin stuff instead of hardcoding the airbyte meta columns?
return """
INSERT INTO `${targetTableName.namespace}`.`${targetTableName.name}`
@@ -204,79 +110,20 @@ class ClickhouseSqlGenerator(
$COLUMN_NAME_AB_EXTRACTED_AT,
$COLUMN_NAME_AB_META,
$COLUMN_NAME_AB_GENERATION_ID,
$columnNames
$joinedNames
)
SELECT
$COLUMN_NAME_AB_RAW_ID,
$COLUMN_NAME_AB_EXTRACTED_AT,
$COLUMN_NAME_AB_META,
$COLUMN_NAME_AB_GENERATION_ID,
$columnNames
$joinedNames
FROM `${sourceTableName.namespace}`.`${sourceTableName.name}`
"""
.trimIndent()
.andLog()
}
/**
* A SQL SELECT statement that extracts records from the table and dedupes the records (since we
* only need the most-recent record to upsert).
*/
private fun selectDedupedRecords(
stream: DestinationStream,
sourceTableName: TableName,
columnNameMapping: ColumnNameMapping,
): String {
val columnList: String =
stream.schema.asColumns().keys.joinToString("\n") { fieldName ->
val columnName = columnNameMapping[fieldName]!!
"`$columnName`,"
}
val importType = stream.importType as Dedupe
// We need to dedupe the records. Note the row_number() invocation in
// the SQL statement. We only take the most-recent raw record for each PK.
val pkList =
importType.primaryKey.joinToString(",") { fieldName ->
val columnName = columnNameMapping[fieldName.first()]!!
"`$columnName`"
}
val cursorOrderClause =
if (importType.cursor.isEmpty()) {
""
} else if (importType.cursor.size == 1) {
val columnName = columnNameMapping[importType.cursor.first()]!!
"`$columnName` DESC NULLS LAST,"
} else {
throw UnsupportedOperationException(
"Only top-level cursors are supported, got ${importType.cursor}",
)
}
return """
WITH records AS (
SELECT
$columnList
$COLUMN_NAME_AB_META,
$COLUMN_NAME_AB_RAW_ID,
$COLUMN_NAME_AB_EXTRACTED_AT,
$COLUMN_NAME_AB_GENERATION_ID
FROM `${sourceTableName.namespace}`.`${sourceTableName.name}`
), numbered_rows AS (
SELECT *, row_number() OVER (
PARTITION BY $pkList ORDER BY $cursorOrderClause `$COLUMN_NAME_AB_EXTRACTED_AT` DESC
) AS row_number
FROM records
)
SELECT $columnList $COLUMN_NAME_AB_META, $COLUMN_NAME_AB_RAW_ID, $COLUMN_NAME_AB_EXTRACTED_AT, $COLUMN_NAME_AB_GENERATION_ID
FROM numbered_rows
WHERE row_number = 1
"""
.trimIndent()
.andLog()
}
fun countTable(
tableName: TableName,
alias: String = "",
@@ -297,21 +144,6 @@ class ClickhouseSqlGenerator(
.trimIndent()
.andLog()
private fun columnsAndTypes(
stream: DestinationStream,
columnNameMapping: ColumnNameMapping,
nonNullableColumns: List<String>,
): String {
return stream.schema
.asColumns()
.map { (fieldName, type) ->
val columnName = columnNameMapping[fieldName]!!
val typeName = type.type.toDialectType(clickhouseConfiguration.enableJson)
"`$columnName` ${typeDecl(typeName, !nonNullableColumns.contains(columnName))}"
}
.joinToString(",\n")
}
fun alterTable(alterationSummary: ColumnChangeset, tableName: TableName): String {
val builder =
StringBuilder()
@@ -330,53 +162,36 @@ class ClickhouseSqlGenerator(
return builder.dropLast(1).toString().andLog()
}
companion object {
const val DATETIME_WITH_PRECISION = "DateTime64(3)"
const val DECIMAL_WITH_PRECISION_AND_SCALE = "Decimal(38, 9)"
private val VALID_VERSION_COLUMN_TYPES =
setOf(
IntegerType::class,
DateType::class,
TimestampTypeWithTimezone::class,
TimestampTypeWithoutTimezone::class,
)
}
}
fun String.sqlNullable(): String = "Nullable($this)"
fun AirbyteType.toDialectType(enableJson: Boolean): String =
when (this) {
BooleanType -> ClickHouseDataType.Bool.name
DateType -> ClickHouseDataType.Date32.name
IntegerType -> ClickHouseDataType.Int64.name
NumberType -> DECIMAL_WITH_PRECISION_AND_SCALE
StringType -> ClickHouseDataType.String.name
TimeTypeWithTimezone -> ClickHouseDataType.String.name
TimeTypeWithoutTimezone -> ClickHouseDataType.String.name
TimestampTypeWithTimezone,
TimestampTypeWithoutTimezone -> DATETIME_WITH_PRECISION
is ArrayType,
ArrayTypeWithoutSchema,
is UnionType,
is UnknownType -> ClickHouseDataType.String.name
ObjectTypeWithEmptySchema,
ObjectTypeWithoutSchema,
is ObjectType -> {
if (enableJson) {
ClickHouseDataType.JSON.name
} else {
ClickHouseDataType.String.name
}
}
}
fun typeDecl(type: String, nullable: Boolean) =
fun ColumnType.typeDecl() =
if (nullable) {
type.sqlNullable()
"Nullable($type)"
} else {
type
}
fun ColumnType.typeDecl() = typeDecl(this.type, this.nullable)
/**
* TODO: this is really a schema validation function and should probably run on startup long
* before we go to create a table.
*/
internal fun flattenPks(
primaryKey: List<List<String>>,
): List<String> {
return primaryKey.map { fieldPath ->
if (fieldPath.size != 1) {
throw UnsupportedOperationException(
"Only top-level primary keys are supported, got $fieldPath",
)
}
fieldPath.first()
}
}
/**
* This extension is here to avoid writing `.also { log.info { it }}` for every returned string
* we want to log
*/
private fun String.andLog(): String {
log.info { this }
return this
}
}

View File

@@ -0,0 +1,32 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.clickhouse.client
import io.airbyte.cdk.load.table.CDC_CURSOR_COLUMN
import io.airbyte.integrations.destination.clickhouse.client.ClickhouseSqlTypes.VALID_VERSION_COLUMN_TYPES
object ClickhouseSqlTypes {
const val DATETIME_WITH_PRECISION = "DateTime64(3)"
const val DECIMAL_WITH_PRECISION_AND_SCALE = "Decimal(38, 9)"
const val BOOL = "Bool"
const val DATE32 = "Date32"
const val INT64 = "Int64"
const val STRING = "String"
const val JSON = "JSON"
val VALID_VERSION_COLUMN_TYPES =
setOf(
INT64,
DATE32,
DATETIME_WITH_PRECISION,
)
}
// Warning: if any munging changes the name of the CDC column name this will break.
// Currently, that is not the case.
fun isValidVersionColumn(name: String, type: String) =
// CDC cursors cannot be used as a version column since they are null
// during the initial CDC snapshot.
name != CDC_CURSOR_COLUMN && VALID_VERSION_COLUMN_TYPES.contains(type)

View File

@@ -1,62 +0,0 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.clickhouse.config
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.data.Transformations.Companion.toAlphanumericAndUnderscore
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.table.ColumnNameGenerator
import io.airbyte.cdk.load.table.FinalTableNameGenerator
import io.airbyte.integrations.destination.clickhouse.spec.ClickhouseConfiguration
import jakarta.inject.Singleton
import java.util.Locale
import java.util.UUID
@Singleton
class ClickhouseFinalTableNameGenerator(private val config: ClickhouseConfiguration) :
FinalTableNameGenerator {
override fun getTableName(streamDescriptor: DestinationStream.Descriptor) =
TableName(
namespace =
(streamDescriptor.namespace ?: config.resolvedDatabase)
.toClickHouseCompatibleName(),
name = streamDescriptor.name.toClickHouseCompatibleName(),
)
}
@Singleton
class ClickhouseColumnNameGenerator : ColumnNameGenerator {
override fun getColumnName(column: String): ColumnNameGenerator.ColumnName {
return ColumnNameGenerator.ColumnName(
column.toClickHouseCompatibleName(),
column.lowercase(Locale.getDefault()).toClickHouseCompatibleName(),
)
}
}
/**
* Transforms a string to be compatible with ClickHouse table and column names.
*
* @return The transformed string suitable for ClickHouse identifiers.
*/
fun String.toClickHouseCompatibleName(): String {
// 1. Replace any character that is not a letter,
// a digit (0-9), or an underscore (_) with a single underscore.
var transformed = toAlphanumericAndUnderscore(this)
// 2. Ensure the identifier does not start with a digit.
// If it starts with a digit, prepend an underscore.
if (transformed.isNotEmpty() && transformed[0].isDigit()) {
transformed = "_$transformed"
}
// 3.Do not allow empty strings.
if (transformed.isEmpty()) {
return "default_name_${UUID.randomUUID()}" // A fallback name if the input results in an
// empty string
}
return transformed
}

View File

@@ -0,0 +1,33 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.clickhouse.schema
import io.airbyte.cdk.load.data.Transformations.Companion.toAlphanumericAndUnderscore
import java.util.UUID
/**
* Transforms a string to be compatible with ClickHouse table and column names.
*
* @return The transformed string suitable for ClickHouse identifiers.
*/
fun String.toClickHouseCompatibleName(): String {
// 1. Replace any character that is not a letter,
// a digit (0-9), or an underscore (_) with a single underscore.
var transformed = toAlphanumericAndUnderscore(this)
// 2.Do not allow empty strings.
if (transformed.isEmpty()) {
return "default_name_${UUID.randomUUID()}" // A fallback name if the input results in an
// empty string
}
// 3. Ensure the identifier does not start with a digit.
// If it starts with a digit, prepend an underscore.
if (transformed[0].isDigit()) {
transformed = "_$transformed"
}
return transformed
}

View File

@@ -4,6 +4,7 @@
package io.airbyte.integrations.destination.clickhouse.schema
import io.airbyte.cdk.load.command.Dedupe
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.component.ColumnType
import io.airbyte.cdk.load.data.ArrayType
@@ -24,11 +25,11 @@ import io.airbyte.cdk.load.data.TimestampTypeWithoutTimezone
import io.airbyte.cdk.load.data.UnionType
import io.airbyte.cdk.load.data.UnknownType
import io.airbyte.cdk.load.schema.TableSchemaMapper
import io.airbyte.cdk.load.schema.model.StreamTableSchema
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.table.TempTableNameGenerator
import io.airbyte.integrations.destination.clickhouse.client.ClickhouseSqlGenerator.Companion.DATETIME_WITH_PRECISION
import io.airbyte.integrations.destination.clickhouse.client.ClickhouseSqlGenerator.Companion.DECIMAL_WITH_PRECISION_AND_SCALE
import io.airbyte.integrations.destination.clickhouse.config.toClickHouseCompatibleName
import io.airbyte.integrations.destination.clickhouse.client.ClickhouseSqlTypes
import io.airbyte.integrations.destination.clickhouse.client.isValidVersionColumn
import io.airbyte.integrations.destination.clickhouse.spec.ClickhouseConfiguration
import jakarta.inject.Singleton
@@ -55,30 +56,66 @@ class ClickhouseTableSchemaMapper(
// Map Airbyte field types to ClickHouse column types
val clickhouseType =
when (fieldType.type) {
BooleanType -> "Bool"
DateType -> "Date32"
IntegerType -> "Int64"
NumberType -> DECIMAL_WITH_PRECISION_AND_SCALE
StringType -> "String"
TimeTypeWithTimezone -> "String"
TimeTypeWithoutTimezone -> "String"
BooleanType -> ClickhouseSqlTypes.BOOL
DateType -> ClickhouseSqlTypes.DATE32
IntegerType -> ClickhouseSqlTypes.INT64
NumberType -> ClickhouseSqlTypes.DECIMAL_WITH_PRECISION_AND_SCALE
StringType -> ClickhouseSqlTypes.STRING
TimeTypeWithTimezone -> ClickhouseSqlTypes.STRING
TimeTypeWithoutTimezone -> ClickhouseSqlTypes.STRING
TimestampTypeWithTimezone,
TimestampTypeWithoutTimezone -> DATETIME_WITH_PRECISION
TimestampTypeWithoutTimezone -> ClickhouseSqlTypes.DATETIME_WITH_PRECISION
is ArrayType,
ArrayTypeWithoutSchema,
is UnionType,
is UnknownType -> "String"
is UnknownType -> ClickhouseSqlTypes.STRING
ObjectTypeWithEmptySchema,
ObjectTypeWithoutSchema,
is ObjectType -> {
if (config.enableJson) {
"JSON"
ClickhouseSqlTypes.JSON
} else {
"String"
ClickhouseSqlTypes.STRING
}
}
}
return ColumnType(clickhouseType, fieldType.nullable)
}
override fun toFinalSchema(tableSchema: StreamTableSchema): StreamTableSchema {
if (tableSchema.importType !is Dedupe) {
return tableSchema
}
// For dedupe mode we do extra logic to ensure certain columns are non-null:
// 1) the primary key columns
// 2) the version column used by the dedupe engine (in practice the cursor)
val pks = tableSchema.getPrimaryKey().flatten()
val cursor = tableSchema.getCursor().firstOrNull()
val nonNullCols = buildSet {
addAll(pks) // Primary keys are always non-nullable
if (cursor != null) {
// Check if the cursor column type is valid for ClickHouse ReplacingMergeTree
val cursorColumnType = tableSchema.columnSchema.finalSchema[cursor]!!.type
if (isValidVersionColumn(cursor, cursorColumnType)) {
// Cursor column is valid, use it as version column
add(cursor) // Make cursor column non-nullable too
}
}
}
val finalSchema =
tableSchema.columnSchema.finalSchema
.map {
it.key to
it.value.copy(nullable = it.value.nullable && !nonNullCols.contains(it.key))
}
.toMap()
return tableSchema.copy(
columnSchema = tableSchema.columnSchema.copy(finalSchema = finalSchema)
)
}
}

View File

@@ -0,0 +1,77 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.clickhouse.component
import io.airbyte.cdk.load.component.DataCoercionNumberFixtures
import io.airbyte.cdk.load.component.DataCoercionNumberFixtures.NEGATIVE_HIGH_PRECISION_FLOAT
import io.airbyte.cdk.load.component.DataCoercionNumberFixtures.POSITIVE_HIGH_PRECISION_FLOAT
import io.airbyte.cdk.load.component.DataCoercionNumberFixtures.SMALLEST_NEGATIVE_FLOAT32
import io.airbyte.cdk.load.component.DataCoercionNumberFixtures.SMALLEST_NEGATIVE_FLOAT64
import io.airbyte.cdk.load.component.DataCoercionNumberFixtures.SMALLEST_POSITIVE_FLOAT32
import io.airbyte.cdk.load.component.DataCoercionNumberFixtures.SMALLEST_POSITIVE_FLOAT64
import io.airbyte.cdk.load.component.DataCoercionSuite
import io.airbyte.cdk.load.component.TableOperationsClient
import io.airbyte.cdk.load.component.TestTableOperationsClient
import io.airbyte.cdk.load.component.toArgs
import io.airbyte.cdk.load.data.AirbyteValue
import io.airbyte.cdk.load.dataflow.transform.ValueCoercer
import io.airbyte.cdk.load.schema.TableSchemaFactory
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange.Reason
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.MethodSource
@MicronautTest(environments = ["component"], resolveParameters = false)
class ClickhouseDataCoercionTest(
override val coercer: ValueCoercer,
override val opsClient: TableOperationsClient,
override val testClient: TestTableOperationsClient,
override val schemaFactory: TableSchemaFactory,
) : DataCoercionSuite {
@ParameterizedTest
// We use clickhouse's Int64 type for integers
@MethodSource("io.airbyte.cdk.load.component.DataCoercionIntegerFixtures#int64")
override fun `handle integer values`(
inputValue: AirbyteValue,
expectedValue: Any?,
expectedChangeReason: Reason?
) {
super.`handle integer values`(inputValue, expectedValue, expectedChangeReason)
}
@ParameterizedTest
@MethodSource(
"io.airbyte.integrations.destination.clickhouse.component.ClickhouseDataCoercionTest#numbers"
)
override fun `handle number values`(
inputValue: AirbyteValue,
expectedValue: Any?,
expectedChangeReason: Reason?
) {
super.`handle number values`(inputValue, expectedValue, expectedChangeReason)
}
companion object {
/**
* destination-clickhouse doesn't set a change reason when truncating high-precision numbers
* (https://github.com/airbytehq/airbyte-internal-issues/issues/15401)
*/
@JvmStatic
fun numbers() =
DataCoercionNumberFixtures.numeric38_9
.map {
when (it.name) {
POSITIVE_HIGH_PRECISION_FLOAT,
NEGATIVE_HIGH_PRECISION_FLOAT,
SMALLEST_POSITIVE_FLOAT32,
SMALLEST_NEGATIVE_FLOAT32,
SMALLEST_POSITIVE_FLOAT64,
SMALLEST_NEGATIVE_FLOAT64 -> it.copy(changeReason = null)
else -> it
}
}
.toArgs()
}
}

View File

@@ -6,6 +6,7 @@ package io.airbyte.integrations.destination.clickhouse.component
import io.airbyte.cdk.load.component.TableOperationsSuite
import io.airbyte.cdk.load.component.TestTableOperationsClient
import io.airbyte.cdk.load.schema.TableSchemaFactory
import io.airbyte.integrations.destination.clickhouse.client.ClickhouseAirbyteClient
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
import jakarta.inject.Inject
@@ -15,6 +16,7 @@ import org.junit.jupiter.api.Test
class ClickhouseTableOperationsTest : TableOperationsSuite {
@Inject override lateinit var client: ClickhouseAirbyteClient
@Inject override lateinit var testClient: TestTableOperationsClient
@Inject override lateinit var schemaFactory: TableSchemaFactory
@Test
override fun `connect to database`() {

View File

@@ -12,6 +12,8 @@ import io.airbyte.cdk.load.component.TableSchemaEvolutionClient
import io.airbyte.cdk.load.component.TableSchemaEvolutionFixtures
import io.airbyte.cdk.load.component.TableSchemaEvolutionSuite
import io.airbyte.cdk.load.component.TestTableOperationsClient
import io.airbyte.cdk.load.schema.TableSchemaFactory
import io.airbyte.integrations.destination.clickhouse.client.ClickhouseSqlTypes
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
import org.junit.jupiter.api.Test
@@ -19,24 +21,25 @@ import org.junit.jupiter.api.Test
class ClickhouseTableSchemaEvolutionTest(
override val client: TableSchemaEvolutionClient,
override val opsClient: TableOperationsClient,
override val testClient: TestTableOperationsClient
override val testClient: TestTableOperationsClient,
override val schemaFactory: TableSchemaFactory,
) : TableSchemaEvolutionSuite {
private val allTypesTableSchema =
TableSchema(
mapOf(
"string" to ColumnType("String", true),
"boolean" to ColumnType("Bool", true),
"integer" to ColumnType("Int64", true),
"number" to ColumnType("Decimal(38, 9)", true),
"date" to ColumnType("Date32", true),
"timestamp_tz" to ColumnType("DateTime64(3)", true),
"timestamp_ntz" to ColumnType("DateTime64(3)", true),
"time_tz" to ColumnType("String", true),
"time_ntz" to ColumnType("String", true),
"string" to ColumnType(ClickhouseSqlTypes.STRING, true),
"boolean" to ColumnType(ClickhouseSqlTypes.BOOL, true),
"integer" to ColumnType(ClickhouseSqlTypes.INT64, true),
"number" to ColumnType(ClickhouseSqlTypes.DECIMAL_WITH_PRECISION_AND_SCALE, true),
"date" to ColumnType(ClickhouseSqlTypes.DATE32, true),
"timestamp_tz" to ColumnType(ClickhouseSqlTypes.DATETIME_WITH_PRECISION, true),
"timestamp_ntz" to ColumnType(ClickhouseSqlTypes.DATETIME_WITH_PRECISION, true),
"time_tz" to ColumnType(ClickhouseSqlTypes.STRING, true),
"time_ntz" to ColumnType(ClickhouseSqlTypes.STRING, true),
// yes, these three are different
"array" to ColumnType("String", true),
"object" to ColumnType("JSON", true),
"unknown" to ColumnType("String", true),
"array" to ColumnType(ClickhouseSqlTypes.STRING, true),
"object" to ColumnType(ClickhouseSqlTypes.JSON, true),
"unknown" to ColumnType(ClickhouseSqlTypes.STRING, true),
)
)

View File

@@ -16,7 +16,7 @@ import io.airbyte.cdk.load.data.TimestampWithTimezoneValue
import io.airbyte.cdk.load.data.TimestampWithoutTimezoneValue
import io.airbyte.cdk.load.test.util.ExpectedRecordMapper
import io.airbyte.cdk.load.test.util.OutputRecord
import io.airbyte.integrations.destination.clickhouse.config.toClickHouseCompatibleName
import io.airbyte.integrations.destination.clickhouse.schema.toClickHouseCompatibleName
import java.math.RoundingMode
import java.time.LocalTime
import java.time.ZoneOffset

View File

@@ -30,8 +30,8 @@ import io.airbyte.cdk.load.write.UnknownTypesBehavior
import io.airbyte.integrations.destination.clickhouse.ClickhouseConfigUpdater
import io.airbyte.integrations.destination.clickhouse.ClickhouseContainerHelper
import io.airbyte.integrations.destination.clickhouse.Utils
import io.airbyte.integrations.destination.clickhouse.config.toClickHouseCompatibleName
import io.airbyte.integrations.destination.clickhouse.fixtures.ClickhouseExpectedRecordMapper
import io.airbyte.integrations.destination.clickhouse.schema.toClickHouseCompatibleName
import io.airbyte.integrations.destination.clickhouse.spec.ClickhouseConfiguration
import io.airbyte.integrations.destination.clickhouse.spec.ClickhouseConfigurationFactory
import io.airbyte.integrations.destination.clickhouse.spec.ClickhouseSpecificationOss

View File

@@ -17,11 +17,10 @@ import io.airbyte.cdk.load.component.TableSchema
import io.airbyte.cdk.load.data.FieldType
import io.airbyte.cdk.load.data.StringType
import io.airbyte.cdk.load.message.Meta
import io.airbyte.cdk.load.schema.model.StreamTableSchema
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.cdk.load.table.TempTableNameGenerator
import io.airbyte.integrations.destination.clickhouse.config.ClickhouseFinalTableNameGenerator
import io.airbyte.integrations.destination.clickhouse.spec.ClickhouseConfiguration
import io.mockk.coEvery
import io.mockk.coVerify
import io.mockk.coVerifyOrder
@@ -39,10 +38,7 @@ class ClickhouseAirbyteClientTest {
// Mocks
private val client: ClickHouseClientRaw = mockk(relaxed = true)
private val clickhouseSqlGenerator: ClickhouseSqlGenerator = mockk(relaxed = true)
private val clickhouseFinalTableNameGenerator: ClickhouseFinalTableNameGenerator =
mockk(relaxed = true)
private val tempTableNameGenerator: TempTableNameGenerator = mockk(relaxed = true)
private val clickhouseConfiguration: ClickhouseConfiguration = mockk(relaxed = true)
// Client
private val clickhouseAirbyteClient =
@@ -51,7 +47,6 @@ class ClickhouseAirbyteClientTest {
client,
clickhouseSqlGenerator,
tempTableNameGenerator,
clickhouseConfiguration
)
)
@@ -107,7 +102,6 @@ class ClickhouseAirbyteClientTest {
alterTableStatement
coEvery { clickhouseAirbyteClient.execute(alterTableStatement) } returns
mockk(relaxed = true)
every { clickhouseFinalTableNameGenerator.getTableName(any()) } returns mockTableName
mockCHSchemaWithAirbyteColumns()
@@ -125,6 +119,16 @@ class ClickhouseAirbyteClientTest {
every { asColumns() } returns LinkedHashMap.newLinkedHashMap(0)
}
every { importType } returns Append
every { tableSchema } returns
mockk(relaxed = true) {
every { columnSchema } returns
mockk(relaxed = true) {
every { inputSchema } returns LinkedHashMap.newLinkedHashMap(0)
every { inputToFinalColumnNames } returns emptyMap()
}
every { getPrimaryKey() } returns emptyList()
every { getCursor() } returns emptyList()
}
}
clickhouseAirbyteClient.applyChangeset(
stream,
@@ -164,11 +168,20 @@ class ClickhouseAirbyteClientTest {
coEvery { clickhouseAirbyteClient.execute(any()) } returns mockk(relaxed = true)
every { tempTableNameGenerator.generate(any()) } returns tempTableName
every { clickhouseFinalTableNameGenerator.getTableName(any()) } returns finalTableName
mockCHSchemaWithAirbyteColumns()
val columnMapping = ColumnNameMapping(mapOf())
val tableSchema1: StreamTableSchema =
mockk(relaxed = true) {
every { columnSchema } returns
mockk(relaxed = true) {
every { inputSchema } returns LinkedHashMap.newLinkedHashMap(0)
every { inputToFinalColumnNames } returns emptyMap()
}
every { getPrimaryKey() } returns emptyList()
every { getCursor() } returns emptyList()
}
val stream =
mockk<DestinationStream> {
every { mappedDescriptor } returns
@@ -182,6 +195,7 @@ class ClickhouseAirbyteClientTest {
every { asColumns() } returns LinkedHashMap.newLinkedHashMap(0)
}
every { importType } returns Append
every { tableSchema } returns tableSchema1
}
clickhouseAirbyteClient.applyChangeset(
stream,
@@ -195,8 +209,8 @@ class ClickhouseAirbyteClientTest {
coVerifyOrder {
clickhouseSqlGenerator.createNamespace(tempTableName.namespace)
clickhouseSqlGenerator.createTable(stream, tempTableName, columnMapping, true)
clickhouseSqlGenerator.copyTable(columnMapping, finalTableName, tempTableName)
clickhouseSqlGenerator.createTable(tempTableName, tableSchema1, true)
clickhouseSqlGenerator.copyTable(setOf("something"), finalTableName, tempTableName)
clickhouseSqlGenerator.exchangeTable(tempTableName, finalTableName)
clickhouseSqlGenerator.dropTable(tempTableName)
}
@@ -207,8 +221,6 @@ class ClickhouseAirbyteClientTest {
fun `test ensure schema matches fails if no airbyte columns`() = runTest {
val finalTableName = TableName("fin", "al")
every { clickhouseFinalTableNameGenerator.getTableName(any()) } returns finalTableName
val columnMapping = ColumnNameMapping(mapOf())
val stream =
mockk<DestinationStream> {
@@ -266,6 +278,19 @@ class ClickhouseAirbyteClientTest {
every { asColumns() } returns columns
}
every { importType } returns Append
every { tableSchema } returns
mockk(relaxed = true) {
every { columnSchema } returns
mockk(relaxed = true) {
every { inputSchema } returns columns
every { inputToFinalColumnNames } returns
mapOf("field 1" to "field_1")
every { finalSchema } returns
mapOf("field_1" to ColumnType("String", true))
}
every { getPrimaryKey() } returns emptyList()
every { getCursor() } returns emptyList()
}
}
val columnMapping = ColumnNameMapping(mapOf("field 1" to "field_1"))
@@ -280,35 +305,6 @@ class ClickhouseAirbyteClientTest {
Assertions.assertEquals(expected, actual)
}
@Test
fun `test copyIntersectionColumn`() = runTest {
val columnsToCopy =
setOf(
"column1",
"column2",
)
val columnNameMapping = ColumnNameMapping(mapOf("2" to "column2", "3" to "column3"))
val properTableName = TableName("table", "name")
val tempTableName = TableName("table", "tmp")
coEvery { clickhouseAirbyteClient.execute(any()) } returns mockk()
clickhouseAirbyteClient.copyIntersectionColumn(
columnsToCopy,
columnNameMapping,
properTableName,
tempTableName,
)
verify {
clickhouseSqlGenerator.copyTable(
ColumnNameMapping(mapOf("2" to "column2")),
properTableName,
tempTableName,
)
}
}
companion object {
// Constants
private const val DUMMY_SENTENCE = "SELECT 1"

View File

@@ -10,9 +10,6 @@ import io.airbyte.cdk.load.component.ColumnChangeset
import io.airbyte.cdk.load.component.ColumnType
import io.airbyte.cdk.load.component.ColumnTypeChange
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.integrations.destination.clickhouse.spec.ClickhouseConfiguration
import io.mockk.mockk
import kotlin.test.assertTrue
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test
@@ -23,9 +20,7 @@ import org.junit.jupiter.params.provider.Arguments
import org.junit.jupiter.params.provider.MethodSource
class ClickhouseSqlGeneratorTest {
private val clickhouseConfiguration: ClickhouseConfiguration = mockk(relaxed = true)
private val clickhouseSqlGenerator = ClickhouseSqlGenerator(clickhouseConfiguration)
private val clickhouseSqlGenerator = ClickhouseSqlGenerator()
@Test
fun testCreateNamespace() {
@@ -90,52 +85,35 @@ class ClickhouseSqlGeneratorTest {
}
}
@Test
fun `test extractPks with single primary key`() {
val primaryKey = listOf(listOf("id"))
val columnNameMapping = ColumnNameMapping(mapOf("id" to "id_column"))
val expected = listOf("id_column")
val actual = clickhouseSqlGenerator.extractPks(primaryKey, columnNameMapping)
Assertions.assertEquals(expected, actual)
}
@Test
fun `test extractPks with multiple primary keys`() {
val primaryKey = listOf(listOf("id"), listOf("name"))
val columnNameMapping =
ColumnNameMapping(mapOf("id" to "id_column", "name" to "name_column"))
val expected = listOf("id_column", "name_column")
val actual = clickhouseSqlGenerator.extractPks(primaryKey, columnNameMapping)
val expected = listOf("id", "name")
val actual = clickhouseSqlGenerator.flattenPks(primaryKey)
Assertions.assertEquals(expected, actual)
}
@Test
fun `test extractPks with empty primary key list`() {
fun `test flattenPks with empty primary key list`() {
val primaryKey = emptyList<List<String>>()
val columnNameMapping = ColumnNameMapping(emptyMap<String, String>())
val expected = listOf<String>()
val actual = clickhouseSqlGenerator.extractPks(primaryKey, columnNameMapping)
val actual = clickhouseSqlGenerator.flattenPks(primaryKey)
Assertions.assertEquals(expected, actual)
}
@Test
fun `test extractPks without column mapping`() {
fun `test extractPks with single primary key`() {
val primaryKey = listOf(listOf("id"))
val columnNameMapping = ColumnNameMapping(mapOf())
val expected = listOf("id")
val actual = clickhouseSqlGenerator.extractPks(primaryKey, columnNameMapping)
val actual = clickhouseSqlGenerator.flattenPks(primaryKey)
Assertions.assertEquals(expected, actual)
}
@Test
fun `test extractPks with nested primary key`() {
fun `test flattenPks with nested primary key`() {
val primaryKey = listOf(listOf("user", "id"))
val columnNameMapping =
ColumnNameMapping(
mapOf("user.id" to "user_id_column")
) // This mapping is not used but here for completeness.
assertThrows<UnsupportedOperationException> {
clickhouseSqlGenerator.extractPks(primaryKey, columnNameMapping)
clickhouseSqlGenerator.flattenPks(primaryKey)
}
}
@@ -157,8 +135,7 @@ class ClickhouseSqlGeneratorTest {
fun `test copyTable`() {
val sourceTable = TableName("source_namespace", "source_table")
val targetTable = TableName("target_namespace", "target_table")
val columnNameMapping =
ColumnNameMapping(mapOf("source_col1" to "target_col1", "source_col2" to "target_col2"))
val columnNames = setOf("target_col1", "target_col2")
val expectedSql =
"""
@@ -179,8 +156,7 @@ class ClickhouseSqlGeneratorTest {
FROM `source_namespace`.`source_table`
""".trimIndent()
val actualSql =
clickhouseSqlGenerator.copyTable(columnNameMapping, sourceTable, targetTable)
val actualSql = clickhouseSqlGenerator.copyTable(columnNames, sourceTable, targetTable)
Assertions.assertEquals(expectedSql, actualSql)
}

View File

@@ -2,13 +2,13 @@
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.clickhouse.config
package io.airbyte.integrations.destination.clickhouse.schema
import java.util.UUID
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test
class ClickhouseNameGeneratorTest {
class ClickhouseNamingUtilsTest {
@Test
fun `toClickHouseCompatibleName replaces special characters with underscores`() {
Assertions.assertEquals("hello_world", "hello world".toClickHouseCompatibleName())

View File

@@ -1,4 +1,4 @@
cdkVersion=0.1.83
cdkVersion=0.1.86
# our testcontainer has issues with too much concurrency.
# 4 threads seems to be the sweet spot.
testExecutionConcurrency=4

View File

@@ -6,7 +6,7 @@ data:
connectorSubtype: database
connectorType: destination
definitionId: 25c5221d-dce2-4163-ade9-739ef790f503
dockerImageTag: 3.0.4
dockerImageTag: 3.0.5
dockerRepository: airbyte/destination-postgres
documentationUrl: https://docs.airbyte.com/integrations/destinations/postgres
githubIssueLabel: destination-postgres

View File

@@ -14,8 +14,11 @@ import io.airbyte.cdk.load.data.FieldType
import io.airbyte.cdk.load.data.ObjectType
import io.airbyte.cdk.load.data.StringType
import io.airbyte.cdk.load.message.Meta
import io.airbyte.cdk.load.schema.model.ColumnSchema
import io.airbyte.cdk.load.schema.model.StreamTableSchema
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.schema.model.TableNames
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.cdk.load.table.TableName
import io.airbyte.integrations.destination.postgres.client.PostgresAirbyteClient
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
import io.airbyte.integrations.destination.postgres.write.load.PostgresInsertBuffer
@@ -51,19 +54,35 @@ class PostgresOssChecker(
"_airbyte_connection_test_${
UUID.randomUUID().toString().replace("-".toRegex(), "")}"
val qualifiedTableName = TableName(namespace = outputSchema, name = tableName)
val tempTableName = TableName(namespace = outputSchema, name = "${tableName}_tmp")
val checkSchema =
ObjectType(linkedMapOf(CHECK_COLUMN_NAME to FieldType(StringType, nullable = false)))
val destinationStream =
DestinationStream(
unmappedNamespace = outputSchema,
unmappedName = tableName,
importType = Append,
schema =
ObjectType(
linkedMapOf(CHECK_COLUMN_NAME to FieldType(StringType, nullable = false))
),
schema = checkSchema,
generationId = 0L,
minimumGenerationId = 0L,
syncId = 0L,
namespaceMapper = NamespaceMapper()
namespaceMapper = NamespaceMapper(),
tableSchema =
StreamTableSchema(
tableNames =
TableNames(
finalTableName = qualifiedTableName,
tempTableName = tempTableName,
),
columnSchema =
ColumnSchema(
inputSchema = checkSchema.properties,
inputToFinalColumnNames =
mapOf(CHECK_COLUMN_NAME to CHECK_COLUMN_NAME),
finalSchema = emptyMap(),
),
importType = Append,
),
)
runBlocking {
try {

View File

@@ -4,15 +4,19 @@
package io.airbyte.integrations.destination.postgres.client
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings
import io.airbyte.cdk.ConfigErrorException
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.component.ColumnChangeset
import io.airbyte.cdk.load.component.ColumnType
import io.airbyte.cdk.load.component.TableColumns
import io.airbyte.cdk.load.component.TableOperationsClient
import io.airbyte.cdk.load.component.TableSchema
import io.airbyte.cdk.load.component.TableSchemaEvolutionClient
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAMES
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_GENERATION_ID
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.cdk.load.table.TableName
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
import io.airbyte.integrations.destination.postgres.sql.COUNT_TOTAL_ALIAS
import io.airbyte.integrations.destination.postgres.sql.Column
@@ -26,6 +30,11 @@ import javax.sql.DataSource
private val log = KotlinLogging.logger {}
@Singleton
@SuppressFBWarnings(
value = ["SQL_NONCONSTANT_STRING_PASSED_TO_EXECUTE"],
justification =
"There is little chance of SQL injection. There is also little need for statement reuse. The basic statement is more readable than the prepared statement."
)
class PostgresAirbyteClient(
private val dataSource: DataSource,
private val sqlGenerator: PostgresDirectLoadSqlGenerator,
@@ -53,6 +62,29 @@ class PostgresAirbyteClient(
null
}
override suspend fun namespaceExists(namespace: String): Boolean {
return executeQuery(
"""
SELECT EXISTS(
SELECT 1 FROM information_schema.schemata
WHERE schema_name = '$namespace'
)
"""
) { rs -> rs.next() && rs.getBoolean(1) }
}
override suspend fun tableExists(table: TableName): Boolean {
return executeQuery(
"""
SELECT EXISTS(
SELECT 1 FROM information_schema.tables
WHERE table_schema = '${table.namespace}'
AND table_name = '${table.name}'
)
"""
) { rs -> rs.next() && rs.getBoolean(1) }
}
override suspend fun createNamespace(namespace: String) {
try {
execute(sqlGenerator.createNamespace(namespace))
@@ -171,14 +203,26 @@ class PostgresAirbyteClient(
}
override suspend fun discoverSchema(tableName: TableName): TableSchema {
TODO("Not yet implemented")
val columnsInDb = getColumnsFromDbForDiscovery(tableName)
val hasAllAirbyteColumns = columnsInDb.keys.containsAll(COLUMN_NAMES)
if (!hasAllAirbyteColumns) {
val message =
"The target table ($tableName) already exists in the destination, but does not contain Airbyte's internal columns. Airbyte can only sync to Airbyte-controlled tables. To fix this error, you must either delete the target table or add a prefix in the connection configuration in order to sync to a separate table in the destination."
log.error { message }
throw ConfigErrorException(message)
}
// Filter out Airbyte columns
val userColumns = columnsInDb.filterKeys { it !in COLUMN_NAMES }
return TableSchema(userColumns)
}
override fun computeSchema(
stream: DestinationStream,
columnNameMapping: ColumnNameMapping
): TableSchema {
TODO("Not yet implemented")
return TableSchema(stream.tableSchema.columnSchema.finalSchema)
}
override suspend fun applyChangeset(
@@ -188,7 +232,71 @@ class PostgresAirbyteClient(
expectedColumns: TableColumns,
columnChangeset: ColumnChangeset
) {
TODO("Not yet implemented")
if (
columnChangeset.columnsToAdd.isNotEmpty() ||
columnChangeset.columnsToDrop.isNotEmpty() ||
columnChangeset.columnsToChange.isNotEmpty()
) {
log.info { "Summary of the table alterations:" }
log.info { "Added columns: ${columnChangeset.columnsToAdd}" }
log.info { "Deleted columns: ${columnChangeset.columnsToDrop}" }
log.info { "Modified columns: ${columnChangeset.columnsToChange}" }
// Convert from TableColumns format to Column format
val columnsToAdd =
columnChangeset.columnsToAdd
.map { (name, type) -> Column(name, type.type, type.nullable) }
.toSet()
val columnsToRemove =
columnChangeset.columnsToDrop
.map { (name, type) -> Column(name, type.type, type.nullable) }
.toSet()
val columnsToModify =
columnChangeset.columnsToChange
.map { (name, change) ->
Column(name, change.newType.type, change.newType.nullable)
}
.toSet()
val columnsInDb =
(columnChangeset.columnsToRetain +
columnChangeset.columnsToDrop +
columnChangeset.columnsToChange.mapValues { it.value.originalType })
.map { (name, type) -> Column(name, type.type, type.nullable) }
.toSet()
execute(
sqlGenerator.matchSchemas(
tableName = tableName,
columnsToAdd = columnsToAdd,
columnsToRemove = columnsToRemove,
columnsToModify = columnsToModify,
columnsInDb = columnsInDb,
recreatePrimaryKeyIndex = false,
primaryKeyColumnNames = emptyList(),
recreateCursorIndex = false,
cursorColumnName = null,
)
)
}
}
/**
* Gets columns from the database including their types for schema discovery. Unlike
* [getColumnsFromDb], this returns all columns including Airbyte metadata columns.
*/
private fun getColumnsFromDbForDiscovery(tableName: TableName): Map<String, ColumnType> =
executeQuery(sqlGenerator.getTableSchema(tableName)) { rs ->
val columnsInDb: MutableMap<String, ColumnType> = mutableMapOf()
while (rs.next()) {
val columnName = rs.getString(COLUMN_NAME_COLUMN)
val dataType = rs.getString("data_type")
// PostgreSQL's information_schema always returns 'YES' or 'NO' for is_nullable
val isNullable = rs.getString("is_nullable") == "YES"
columnsInDb[columnName] = ColumnType(normalizePostgresType(dataType), isNullable)
}
columnsInDb
}
/**

View File

@@ -13,8 +13,8 @@ import io.airbyte.cdk.integrations.util.PostgresSslConnectionUtils
import io.airbyte.cdk.load.check.CheckOperationV2
import io.airbyte.cdk.load.check.DestinationCheckerV2
import io.airbyte.cdk.load.dataflow.config.AggregatePublishingConfig
import io.airbyte.cdk.load.orchestration.db.DefaultTempTableNameGenerator
import io.airbyte.cdk.load.orchestration.db.TempTableNameGenerator
import io.airbyte.cdk.load.table.DefaultTempTableNameGenerator
import io.airbyte.cdk.load.table.TempTableNameGenerator
import io.airbyte.cdk.output.OutputConsumer
import io.airbyte.cdk.ssh.SshConnectionOptions
import io.airbyte.cdk.ssh.SshKeyAuthTunnelMethod

View File

@@ -4,17 +4,17 @@
package io.airbyte.integrations.destination.postgres.config
import io.airbyte.cdk.load.command.DestinationCatalog
import io.airbyte.cdk.load.component.TableOperationsClient
import io.airbyte.cdk.load.orchestration.db.BaseDirectLoadInitialStatusGatherer
import io.airbyte.cdk.load.orchestration.db.TempTableNameGenerator
import io.airbyte.cdk.load.table.BaseDirectLoadInitialStatusGatherer
import jakarta.inject.Singleton
@Singleton
class PostgresDirectLoadDatabaseInitialStatusGatherer(
airbyteClient: TableOperationsClient,
tempTableNameGenerator: TempTableNameGenerator,
catalog: DestinationCatalog,
) :
BaseDirectLoadInitialStatusGatherer(
airbyteClient,
tempTableNameGenerator,
catalog,
)

View File

@@ -7,7 +7,7 @@ package io.airbyte.integrations.destination.postgres.dataflow
import io.airbyte.cdk.load.dataflow.aggregate.Aggregate
import io.airbyte.cdk.load.dataflow.aggregate.AggregateFactory
import io.airbyte.cdk.load.dataflow.aggregate.StoreKey
import io.airbyte.cdk.load.orchestration.db.direct_load_table.DirectLoadTableExecutionConfig
import io.airbyte.cdk.load.table.directload.DirectLoadTableExecutionConfig
import io.airbyte.cdk.load.write.StreamStateStore
import io.airbyte.integrations.destination.postgres.client.PostgresAirbyteClient
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration

View File

@@ -6,10 +6,10 @@ package io.airbyte.integrations.destination.postgres.db
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.data.Transformations.Companion.toAlphanumericAndUnderscore
import io.airbyte.cdk.load.orchestration.db.ColumnNameGenerator
import io.airbyte.cdk.load.orchestration.db.FinalTableNameGenerator
import io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping.TypingDedupingUtil
import io.airbyte.cdk.load.table.TableName
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.table.ColumnNameGenerator
import io.airbyte.cdk.load.table.FinalTableNameGenerator
import io.airbyte.cdk.load.table.TypingDedupingUtil
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
import jakarta.inject.Singleton
import java.util.Locale

View File

@@ -0,0 +1,96 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.postgres.schema
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.component.ColumnType
import io.airbyte.cdk.load.data.ArrayType
import io.airbyte.cdk.load.data.ArrayTypeWithoutSchema
import io.airbyte.cdk.load.data.BooleanType
import io.airbyte.cdk.load.data.DateType
import io.airbyte.cdk.load.data.FieldType
import io.airbyte.cdk.load.data.IntegerType
import io.airbyte.cdk.load.data.NumberType
import io.airbyte.cdk.load.data.ObjectType
import io.airbyte.cdk.load.data.ObjectTypeWithEmptySchema
import io.airbyte.cdk.load.data.ObjectTypeWithoutSchema
import io.airbyte.cdk.load.data.StringType
import io.airbyte.cdk.load.data.TimeTypeWithTimezone
import io.airbyte.cdk.load.data.TimeTypeWithoutTimezone
import io.airbyte.cdk.load.data.TimestampTypeWithTimezone
import io.airbyte.cdk.load.data.TimestampTypeWithoutTimezone
import io.airbyte.cdk.load.data.UnionType
import io.airbyte.cdk.load.data.UnknownType
import io.airbyte.cdk.load.schema.TableSchemaMapper
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.table.TempTableNameGenerator
import io.airbyte.cdk.load.table.TypingDedupingUtil
import io.airbyte.integrations.destination.postgres.db.toPostgresCompatibleName
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
import io.airbyte.integrations.destination.postgres.sql.PostgresDataType
import jakarta.inject.Singleton
@Singleton
class PostgresTableSchemaMapper(
private val config: PostgresConfiguration,
private val tempTableNameGenerator: TempTableNameGenerator,
) : TableSchemaMapper {
override fun toFinalTableName(desc: DestinationStream.Descriptor): TableName {
val namespace = desc.namespace ?: config.schema
return if (!config.legacyRawTablesOnly) {
TableName(
namespace = namespace.toPostgresCompatibleName(),
name = desc.name.toPostgresCompatibleName(),
)
} else {
TableName(
namespace = config.internalTableSchema!!.lowercase().toPostgresCompatibleName(),
name =
TypingDedupingUtil.concatenateRawTableName(
namespace = namespace,
name = desc.name,
)
.lowercase()
.toPostgresCompatibleName(),
)
}
}
override fun toTempTableName(tableName: TableName): TableName {
return tempTableNameGenerator.generate(tableName)
}
override fun toColumnName(name: String): String {
return if (config.legacyRawTablesOnly) {
name
} else {
name.toPostgresCompatibleName()
}
}
override fun toColumnType(fieldType: FieldType): ColumnType {
val postgresType =
when (fieldType.type) {
BooleanType -> PostgresDataType.BOOLEAN.typeName
DateType -> PostgresDataType.DATE.typeName
IntegerType -> PostgresDataType.BIGINT.typeName
NumberType -> PostgresDataType.DECIMAL.typeName
StringType -> PostgresDataType.VARCHAR.typeName
TimeTypeWithTimezone -> PostgresDataType.TIME_WITH_TIMEZONE.typeName
TimeTypeWithoutTimezone -> PostgresDataType.TIME.typeName
TimestampTypeWithTimezone -> PostgresDataType.TIMESTAMP_WITH_TIMEZONE.typeName
TimestampTypeWithoutTimezone -> PostgresDataType.TIMESTAMP.typeName
is ArrayType,
ArrayTypeWithoutSchema,
is ObjectType,
ObjectTypeWithEmptySchema,
ObjectTypeWithoutSchema,
is UnknownType,
is UnionType -> PostgresDataType.JSONB.typeName
}
return ColumnType(postgresType, fieldType.nullable)
}
}

View File

@@ -31,8 +31,8 @@ import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_LOADED_AT
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_META
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_RAW_ID
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_DATA
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.cdk.load.table.TableName
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
import jakarta.inject.Singleton
import kotlin.collections.plus

View File

@@ -9,9 +9,9 @@ import io.airbyte.cdk.load.command.Dedupe
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_EXTRACTED_AT
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_GENERATION_ID
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.table.CDC_DELETED_AT_COLUMN
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.cdk.load.table.TableName
import io.airbyte.integrations.destination.postgres.spec.CdcDeletionMode
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
import jakarta.inject.Singleton
@@ -531,7 +531,7 @@ class PostgresDirectLoadSqlGenerator(
fun getTableSchema(tableName: TableName): String =
"""
SELECT column_name, data_type
SELECT column_name, data_type, is_nullable
FROM information_schema.columns
WHERE table_schema = '${tableName.namespace}'
AND table_name = '${tableName.name}';

View File

@@ -6,16 +6,17 @@ package io.airbyte.integrations.destination.postgres.write
import io.airbyte.cdk.SystemErrorException
import io.airbyte.cdk.load.command.Dedupe
import io.airbyte.cdk.load.command.DestinationCatalog
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.orchestration.db.DatabaseInitialStatusGatherer
import io.airbyte.cdk.load.orchestration.db.TempTableNameGenerator
import io.airbyte.cdk.load.orchestration.db.direct_load_table.DirectLoadInitialStatus
import io.airbyte.cdk.load.orchestration.db.direct_load_table.DirectLoadTableAppendStreamLoader
import io.airbyte.cdk.load.orchestration.db.direct_load_table.DirectLoadTableAppendTruncateStreamLoader
import io.airbyte.cdk.load.orchestration.db.direct_load_table.DirectLoadTableDedupStreamLoader
import io.airbyte.cdk.load.orchestration.db.direct_load_table.DirectLoadTableDedupTruncateStreamLoader
import io.airbyte.cdk.load.orchestration.db.direct_load_table.DirectLoadTableExecutionConfig
import io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping.TableCatalog
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.cdk.load.table.DatabaseInitialStatusGatherer
import io.airbyte.cdk.load.table.TempTableNameGenerator
import io.airbyte.cdk.load.table.directload.DirectLoadInitialStatus
import io.airbyte.cdk.load.table.directload.DirectLoadTableAppendStreamLoader
import io.airbyte.cdk.load.table.directload.DirectLoadTableAppendTruncateStreamLoader
import io.airbyte.cdk.load.table.directload.DirectLoadTableDedupStreamLoader
import io.airbyte.cdk.load.table.directload.DirectLoadTableDedupTruncateStreamLoader
import io.airbyte.cdk.load.table.directload.DirectLoadTableExecutionConfig
import io.airbyte.cdk.load.write.DestinationWriter
import io.airbyte.cdk.load.write.StreamLoader
import io.airbyte.cdk.load.write.StreamStateStore
@@ -28,7 +29,7 @@ private val log = KotlinLogging.logger {}
@Singleton
class PostgresWriter(
private val names: TableCatalog,
private val catalog: DestinationCatalog,
private val stateGatherer: DatabaseInitialStatusGatherer<DirectLoadInitialStatus>,
private val streamStateStore: StreamStateStore<DirectLoadTableExecutionConfig>,
private val postgresClient: PostgresAirbyteClient,
@@ -38,19 +39,20 @@ class PostgresWriter(
private lateinit var initialStatuses: Map<DestinationStream, DirectLoadInitialStatus>
override suspend fun setup() {
names.values
.map { (tableNames, _) -> tableNames.finalTableName!!.namespace }
catalog.streams
.map { it.tableSchema.tableNames.finalTableName!!.namespace }
.forEach { postgresClient.createNamespace(it) }
initialStatuses = stateGatherer.gatherInitialStatus(names)
initialStatuses = stateGatherer.gatherInitialStatus()
}
override fun createStreamLoader(stream: DestinationStream): StreamLoader {
val initialStatus = initialStatuses[stream]!!
val tableNameInfo = names[stream]!!
val realTableName = tableNameInfo.tableNames.finalTableName!!
val realTableName = stream.tableSchema.tableNames.finalTableName!!
val tempTableName = tempTableNameGenerator.generate(realTableName)
val columnNameMapping = tableNameInfo.columnNameMapping
val columnNameMapping =
ColumnNameMapping(stream.tableSchema.columnSchema.inputToFinalColumnNames)
val isRawTablesMode = postgresConfiguration.legacyRawTablesOnly == true
if (isRawTablesMode && stream.importType is Dedupe) {

View File

@@ -6,7 +6,7 @@ package io.airbyte.integrations.destination.postgres.write.load
import com.google.common.annotations.VisibleForTesting
import io.airbyte.cdk.load.data.AirbyteValue
import io.airbyte.cdk.load.table.TableName
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.integrations.destination.postgres.client.PostgresAirbyteClient
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
import io.github.oshai.kotlinlogging.KotlinLogging

View File

@@ -1,25 +0,0 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.postgres.write.transform
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.dataflow.transform.ColumnNameMapper
import io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping.TableCatalog
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
import jakarta.inject.Singleton
@Singleton
class PostgresColumnNameMapper(
private val catalogInfo: TableCatalog,
private val postgresConfiguration: PostgresConfiguration,
) : ColumnNameMapper {
override fun getMappedColumnName(stream: DestinationStream, columnName: String): String {
if (postgresConfiguration.legacyRawTablesOnly == true) {
return columnName
} else {
return catalogInfo.getMappedColumnName(stream, columnName)!!
}
}
}

View File

@@ -0,0 +1,45 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.postgres.component
import io.airbyte.cdk.load.util.Jsons
import io.airbyte.integrations.destination.postgres.PostgresConfigUpdater
import io.airbyte.integrations.destination.postgres.PostgresContainerHelper
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
import io.airbyte.integrations.destination.postgres.spec.PostgresConfigurationFactory
import io.airbyte.integrations.destination.postgres.spec.PostgresSpecificationOss
import io.micronaut.context.annotation.Factory
import io.micronaut.context.annotation.Primary
import io.micronaut.context.annotation.Requires
import jakarta.inject.Singleton
@Requires(env = ["component"])
@Factory
class PostgresComponentTestConfigFactory {
@Singleton
@Primary
fun config(): PostgresConfiguration {
// Start the postgres container
PostgresContainerHelper.start()
// Create a minimal config JSON and update it with container details
val configJson =
"""
{
"host": "replace_me_host",
"port": "replace_me_port",
"database": "replace_me_database",
"schema": "public",
"username": "replace_me_username",
"password": "replace_me_password",
"ssl": false
}
"""
val updatedConfig = PostgresConfigUpdater().update(configJson)
val spec = Jsons.readValue(updatedConfig, PostgresSpecificationOss::class.java)
return PostgresConfigurationFactory().makeWithoutExceptionHandling(spec)
}
}

View File

@@ -0,0 +1,36 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.postgres.component
import io.airbyte.cdk.load.component.ColumnType
import io.airbyte.cdk.load.component.TableOperationsFixtures
import io.airbyte.cdk.load.component.TableSchema
object PostgresComponentTestFixtures {
// PostgreSQL uses lowercase column names by default (no transformation needed)
val testMapping = TableOperationsFixtures.TEST_MAPPING
val idAndTestMapping = TableOperationsFixtures.ID_AND_TEST_MAPPING
val idTestWithCdcMapping = TableOperationsFixtures.ID_TEST_WITH_CDC_MAPPING
val allTypesTableSchema =
TableSchema(
mapOf(
"string" to ColumnType("varchar", true),
"boolean" to ColumnType("boolean", true),
"integer" to ColumnType("bigint", true),
"number" to ColumnType("decimal", true),
"date" to ColumnType("date", true),
"timestamp_tz" to ColumnType("timestamp with time zone", true),
"timestamp_ntz" to ColumnType("timestamp", true),
"time_tz" to ColumnType("time with time zone", true),
"time_ntz" to ColumnType("time", true),
"array" to ColumnType("jsonb", true),
"object" to ColumnType("jsonb", true),
"unknown" to ColumnType("jsonb", true),
)
)
val allTypesColumnNameMapping = TableOperationsFixtures.ALL_TYPES_MAPPING
}

View File

@@ -0,0 +1,92 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.postgres.component
import io.airbyte.cdk.load.component.TableOperationsFixtures
import io.airbyte.cdk.load.component.TableOperationsSuite
import io.airbyte.cdk.load.schema.TableSchemaFactory
import io.airbyte.integrations.destination.postgres.client.PostgresAirbyteClient
import io.airbyte.integrations.destination.postgres.component.PostgresComponentTestFixtures.idTestWithCdcMapping
import io.airbyte.integrations.destination.postgres.component.PostgresComponentTestFixtures.testMapping
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
import jakarta.inject.Inject
import org.junit.jupiter.api.Disabled
import org.junit.jupiter.api.Test
@MicronautTest(environments = ["component"])
class PostgresTableOperationsTest(
override val client: PostgresAirbyteClient,
override val testClient: PostgresTestTableOperationsClient,
) : TableOperationsSuite {
@Inject override lateinit var schemaFactory: TableSchemaFactory
@Test
override fun `connect to database`() {
super.`connect to database`()
}
@Test
override fun `create and drop namespaces`() {
super.`create and drop namespaces`()
}
@Test
override fun `create and drop tables`() {
super.`create and drop tables`()
}
@Test
override fun `insert records`() {
super.`insert records`(
inputRecords = TableOperationsFixtures.SINGLE_TEST_RECORD_INPUT,
expectedRecords = TableOperationsFixtures.SINGLE_TEST_RECORD_EXPECTED,
columnNameMapping = testMapping,
)
}
@Test
override fun `count table rows`() {
super.`count table rows`(columnNameMapping = testMapping)
}
@Test
override fun `overwrite tables`() {
super.`overwrite tables`(
sourceInputRecords = TableOperationsFixtures.OVERWRITE_SOURCE_RECORDS,
targetInputRecords = TableOperationsFixtures.OVERWRITE_TARGET_RECORDS,
expectedRecords = TableOperationsFixtures.OVERWRITE_EXPECTED_RECORDS,
columnNameMapping = testMapping,
)
}
@Test
override fun `copy tables`() {
super.`copy tables`(
sourceInputRecords = TableOperationsFixtures.OVERWRITE_SOURCE_RECORDS,
targetInputRecords = TableOperationsFixtures.OVERWRITE_TARGET_RECORDS,
expectedRecords = TableOperationsFixtures.COPY_EXPECTED_RECORDS,
columnNameMapping = testMapping,
)
}
@Test
override fun `get generation id`() {
super.`get generation id`(columnNameMapping = testMapping)
}
// TODO: Re-enable when CDK TableOperationsSuite is fixed to use ID_AND_TEST_SCHEMA for target
// table instead of TEST_INTEGER_SCHEMA (the Dedupe mode requires the id column as primary key)
@Disabled("CDK TableOperationsSuite bug: target table schema missing 'id' column for Dedupe")
@Test
override fun `upsert tables`() {
super.`upsert tables`(
sourceInputRecords = TableOperationsFixtures.UPSERT_SOURCE_RECORDS,
targetInputRecords = TableOperationsFixtures.UPSERT_TARGET_RECORDS,
expectedRecords = TableOperationsFixtures.UPSERT_EXPECTED_RECORDS,
columnNameMapping = idTestWithCdcMapping,
)
}
}

View File

@@ -0,0 +1,111 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.postgres.component
import io.airbyte.cdk.load.command.ImportType
import io.airbyte.cdk.load.component.TableSchemaEvolutionFixtures
import io.airbyte.cdk.load.component.TableSchemaEvolutionSuite
import io.airbyte.cdk.load.schema.TableSchemaFactory
import io.airbyte.integrations.destination.postgres.client.PostgresAirbyteClient
import io.airbyte.integrations.destination.postgres.component.PostgresComponentTestFixtures.allTypesColumnNameMapping
import io.airbyte.integrations.destination.postgres.component.PostgresComponentTestFixtures.allTypesTableSchema
import io.airbyte.integrations.destination.postgres.component.PostgresComponentTestFixtures.idAndTestMapping
import io.airbyte.integrations.destination.postgres.component.PostgresComponentTestFixtures.testMapping
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
import org.junit.jupiter.api.Test
@MicronautTest(environments = ["component"], resolveParameters = false)
class PostgresTableSchemaEvolutionTest(
override val client: PostgresAirbyteClient,
override val opsClient: PostgresAirbyteClient,
override val testClient: PostgresTestTableOperationsClient,
override val schemaFactory: TableSchemaFactory,
) : TableSchemaEvolutionSuite {
@Test
fun `discover recognizes all data types`() {
super.`discover recognizes all data types`(allTypesTableSchema, allTypesColumnNameMapping)
}
@Test
fun `computeSchema handles all data types`() {
super.`computeSchema handles all data types`(allTypesTableSchema, allTypesColumnNameMapping)
}
@Test
override fun `noop diff`() {
super.`noop diff`(testMapping)
}
@Test
override fun `changeset is correct when adding a column`() {
super.`changeset is correct when adding a column`(testMapping, idAndTestMapping)
}
@Test
override fun `changeset is correct when dropping a column`() {
super.`changeset is correct when dropping a column`(idAndTestMapping, testMapping)
}
@Test
override fun `changeset is correct when changing a column's type`() {
super.`changeset is correct when changing a column's type`(testMapping)
}
@Test
override fun `apply changeset - handle sync mode append`() {
super.`apply changeset - handle sync mode append`()
}
@Test
override fun `apply changeset - handle changing sync mode from append to dedup`() {
super.`apply changeset - handle changing sync mode from append to dedup`()
}
@Test
override fun `apply changeset - handle changing sync mode from dedup to append`() {
super.`apply changeset - handle changing sync mode from dedup to append`()
}
@Test
override fun `apply changeset - handle sync mode dedup`() {
super.`apply changeset - handle sync mode dedup`()
}
override fun `apply changeset`(
initialStreamImportType: ImportType,
modifiedStreamImportType: ImportType,
) {
super.`apply changeset`(
initialColumnNameMapping =
TableSchemaEvolutionFixtures.APPLY_CHANGESET_INITIAL_COLUMN_MAPPING,
modifiedColumnNameMapping =
TableSchemaEvolutionFixtures.APPLY_CHANGESET_MODIFIED_COLUMN_MAPPING,
TableSchemaEvolutionFixtures.APPLY_CHANGESET_EXPECTED_EXTRACTED_AT,
initialStreamImportType,
modifiedStreamImportType,
)
}
@Test
override fun `change from string type to unknown type`() {
super.`change from string type to unknown type`(
idAndTestMapping,
idAndTestMapping,
TableSchemaEvolutionFixtures.STRING_TO_UNKNOWN_TYPE_INPUT_RECORDS,
TableSchemaEvolutionFixtures.STRING_TO_UNKNOWN_TYPE_EXPECTED_RECORDS,
)
}
@Test
override fun `change from unknown type to string type`() {
super.`change from unknown type to string type`(
idAndTestMapping,
idAndTestMapping,
TableSchemaEvolutionFixtures.UNKNOWN_TO_STRING_TYPE_INPUT_RECORDS,
TableSchemaEvolutionFixtures.UNKNOWN_TO_STRING_TYPE_EXPECTED_RECORDS,
)
}
}

View File

@@ -0,0 +1,257 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.integrations.destination.postgres.component
import io.airbyte.cdk.load.component.TestTableOperationsClient
import io.airbyte.cdk.load.data.AirbyteValue
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.util.Jsons
import io.airbyte.integrations.destination.postgres.client.PostgresAirbyteClient
import io.micronaut.context.annotation.Requires
import jakarta.inject.Singleton
import java.time.OffsetDateTime
import java.time.ZoneOffset
import java.time.format.DateTimeFormatter
import javax.sql.DataSource
@Requires(env = ["component"])
@Singleton
class PostgresTestTableOperationsClient(
private val dataSource: DataSource,
private val client: PostgresAirbyteClient,
) : TestTableOperationsClient {
override suspend fun ping() {
dataSource.connection.use { connection ->
connection.createStatement().use { statement -> statement.executeQuery("SELECT 1") }
}
}
override suspend fun dropNamespace(namespace: String) {
dataSource.connection.use { connection ->
connection.createStatement().use { statement ->
statement.execute("DROP SCHEMA IF EXISTS \"$namespace\" CASCADE")
}
}
}
override suspend fun insertRecords(table: TableName, records: List<Map<String, AirbyteValue>>) {
if (records.isEmpty()) return
// Get column types from database to handle jsonb columns properly
val columnTypes = getColumnTypes(table)
// Get all unique columns from ALL records to handle sparse data (e.g., CDC deletion column)
val columns = records.flatMap { it.keys }.distinct().toList()
val columnNames = columns.joinToString(", ") { "\"$it\"" }
val placeholders = columns.indices.joinToString(", ") { "?" }
val sql =
"""
INSERT INTO "${table.namespace}"."${table.name}" ($columnNames)
VALUES ($placeholders)
"""
dataSource.connection.use { connection ->
connection.prepareStatement(sql).use { statement ->
for (record in records) {
columns.forEachIndexed { index, column ->
val value = record[column]
val columnType = columnTypes[column]
setParameterValue(statement, index + 1, value, columnType)
}
statement.addBatch()
}
statement.executeBatch()
}
}
}
private fun getColumnTypes(table: TableName): Map<String, String> {
val columnTypes = mutableMapOf<String, String>()
dataSource.connection.use { connection ->
connection.createStatement().use { statement ->
statement
.executeQuery(
"""
SELECT column_name, data_type
FROM information_schema.columns
WHERE table_schema = '${table.namespace}'
AND table_name = '${table.name}'
"""
)
.use { resultSet ->
while (resultSet.next()) {
columnTypes[resultSet.getString("column_name")] =
resultSet.getString("data_type")
}
}
}
}
return columnTypes
}
private fun setParameterValue(
statement: java.sql.PreparedStatement,
index: Int,
value: AirbyteValue?,
columnType: String?
) {
// If column is jsonb, serialize any value as JSON
if (columnType == "jsonb") {
if (value == null || value is io.airbyte.cdk.load.data.NullValue) {
statement.setNull(index, java.sql.Types.OTHER)
} else {
val pgObject = org.postgresql.util.PGobject()
pgObject.type = "jsonb"
pgObject.value = serializeToJson(value)
statement.setObject(index, pgObject)
}
return
}
when (value) {
null,
is io.airbyte.cdk.load.data.NullValue -> statement.setNull(index, java.sql.Types.NULL)
is io.airbyte.cdk.load.data.StringValue -> statement.setString(index, value.value)
is io.airbyte.cdk.load.data.IntegerValue ->
statement.setLong(index, value.value.toLong())
is io.airbyte.cdk.load.data.NumberValue -> statement.setBigDecimal(index, value.value)
is io.airbyte.cdk.load.data.BooleanValue -> statement.setBoolean(index, value.value)
is io.airbyte.cdk.load.data.TimestampWithTimezoneValue -> {
val offsetDateTime = OffsetDateTime.parse(value.value.toString())
statement.setObject(index, offsetDateTime)
}
is io.airbyte.cdk.load.data.TimestampWithoutTimezoneValue -> {
val localDateTime = java.time.LocalDateTime.parse(value.value.toString())
statement.setObject(index, localDateTime)
}
is io.airbyte.cdk.load.data.DateValue -> {
val localDate = java.time.LocalDate.parse(value.value.toString())
statement.setObject(index, localDate)
}
is io.airbyte.cdk.load.data.TimeWithTimezoneValue -> {
statement.setString(index, value.value.toString())
}
is io.airbyte.cdk.load.data.TimeWithoutTimezoneValue -> {
val localTime = java.time.LocalTime.parse(value.value.toString())
statement.setObject(index, localTime)
}
is io.airbyte.cdk.load.data.ObjectValue -> {
val pgObject = org.postgresql.util.PGobject()
pgObject.type = "jsonb"
pgObject.value = Jsons.writeValueAsString(value.values)
statement.setObject(index, pgObject)
}
is io.airbyte.cdk.load.data.ArrayValue -> {
val pgObject = org.postgresql.util.PGobject()
pgObject.type = "jsonb"
pgObject.value = Jsons.writeValueAsString(value.values)
statement.setObject(index, pgObject)
}
else -> {
// For unknown types, try to serialize as string
statement.setString(index, value.toString())
}
}
}
private fun serializeToJson(value: AirbyteValue): String {
return when (value) {
is io.airbyte.cdk.load.data.StringValue -> Jsons.writeValueAsString(value.value)
is io.airbyte.cdk.load.data.IntegerValue -> value.value.toString()
is io.airbyte.cdk.load.data.NumberValue -> value.value.toString()
is io.airbyte.cdk.load.data.BooleanValue -> value.value.toString()
is io.airbyte.cdk.load.data.ObjectValue -> Jsons.writeValueAsString(value.values)
is io.airbyte.cdk.load.data.ArrayValue -> Jsons.writeValueAsString(value.values)
is io.airbyte.cdk.load.data.NullValue -> "null"
else -> Jsons.writeValueAsString(value.toString())
}
}
override suspend fun readTable(table: TableName): List<Map<String, Any>> {
dataSource.connection.use { connection ->
connection.createStatement().use { statement ->
statement
.executeQuery("""SELECT * FROM "${table.namespace}"."${table.name}"""")
.use { resultSet ->
val metaData = resultSet.metaData
val columnCount = metaData.columnCount
val result = mutableListOf<Map<String, Any>>()
while (resultSet.next()) {
val row = mutableMapOf<String, Any>()
for (i in 1..columnCount) {
val columnName = metaData.getColumnName(i)
val columnType = metaData.getColumnTypeName(i)
when (columnType.lowercase()) {
"timestamptz" -> {
val value =
resultSet.getObject(i, OffsetDateTime::class.java)
if (value != null) {
val formattedTimestamp =
DateTimeFormatter.ISO_OFFSET_DATE_TIME.format(
value.withOffsetSameInstant(ZoneOffset.UTC)
)
row[columnName] = formattedTimestamp
}
}
"timestamp" -> {
val value = resultSet.getTimestamp(i)
if (value != null) {
val localDateTime = value.toLocalDateTime()
row[columnName] =
DateTimeFormatter.ISO_LOCAL_DATE_TIME.format(
localDateTime
)
}
}
"jsonb",
"json" -> {
val stringValue: String? = resultSet.getString(i)
if (stringValue != null) {
val parsedValue =
Jsons.readValue(stringValue, Any::class.java)
val actualValue =
when (parsedValue) {
is Int -> parsedValue.toLong()
else -> parsedValue
}
row[columnName] = actualValue
}
}
else -> {
val value = resultSet.getObject(i)
if (value != null) {
// For varchar columns that may contain JSON (from
// schema evolution),
// normalize the JSON to compact format for comparison
if (
value is String &&
(value.startsWith("{") || value.startsWith("["))
) {
try {
val parsed =
Jsons.readValue(value, Any::class.java)
row[columnName] =
Jsons.writeValueAsString(parsed)
} catch (_: Exception) {
row[columnName] = value
}
} else {
row[columnName] = value
}
}
}
}
}
result.add(row)
}
return result
}
}
}
}
}

View File

@@ -31,7 +31,7 @@ import io.airbyte.cdk.load.data.TimestampWithoutTimezoneValue
import io.airbyte.cdk.load.data.UnknownType
import io.airbyte.cdk.load.data.json.toAirbyteValue
import io.airbyte.cdk.load.message.Meta
import io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping.TypingDedupingUtil
import io.airbyte.cdk.load.table.TypingDedupingUtil
import io.airbyte.cdk.load.test.util.DestinationDataDumper
import io.airbyte.cdk.load.test.util.OutputRecord
import io.airbyte.cdk.load.util.deserializeToNode
@@ -267,7 +267,7 @@ class PostgresRawDataDumper(
.lowercase()
.toPostgresCompatibleName()
val fullyQualifiedTableName = "$rawNamespace.$rawName"
val fullyQualifiedTableName = "\"$rawNamespace\".\"$rawName\""
// Check if table exists first
val tableExistsQuery =
@@ -302,6 +302,26 @@ class PostgresRawDataDumper(
false
}
// Build the column name mapping from original names to transformed names
// We use the stream schema to get the original field names, then transform them
// using the postgres name transformation logic
val finalToInputColumnNames = mutableMapOf<String, String>()
if (stream.schema is ObjectType) {
val objectSchema = stream.schema as ObjectType
for (fieldName in objectSchema.properties.keys) {
val transformedName = fieldName.toPostgresCompatibleName()
// Map transformed name back to original name
finalToInputColumnNames[transformedName] = fieldName
}
}
// Also check if inputToFinalColumnNames mapping is available
val inputToFinalColumnNames =
stream.tableSchema.columnSchema.inputToFinalColumnNames
// Add entries from the existing mapping (in case it was populated)
for ((input, final) in inputToFinalColumnNames) {
finalToInputColumnNames[final] = input
}
while (resultSet.next()) {
val rawData =
if (hasDataColumn) {
@@ -313,8 +333,22 @@ class PostgresRawDataDumper(
else -> dataObject?.toString() ?: "{}"
}
// Parse JSON to AirbyteValue, then coerce it to match the schema
// Parse JSON to AirbyteValue, then map column names back to originals
val parsedValue =
dataJson?.deserializeToNode()?.toAirbyteValue() ?: NullValue
// If the parsed value is an ObjectValue, map the column names back
if (parsedValue is ObjectValue) {
val mappedProperties = linkedMapOf<String, AirbyteValue>()
for ((key, value) in parsedValue.values) {
// Map final column name back to input column name if mapping
// exists
val originalKey = finalToInputColumnNames[key] ?: key
mappedProperties[originalKey] = value
}
ObjectValue(mappedProperties)
} else {
parsedValue
}
} else {
// Typed table mode: read from individual columns and reconstruct the
// object
@@ -333,10 +367,19 @@ class PostgresRawDataDumper(
for ((fieldName, fieldType) in objectSchema.properties) {
try {
// Map input field name to the transformed final column name
// First check the inputToFinalColumnNames mapping, then
// fall
// back to applying postgres transformation directly
val transformedColumnName =
inputToFinalColumnNames[fieldName]
?: fieldName.toPostgresCompatibleName()
// Try to find the actual column name (case-insensitive
// lookup)
val actualColumnName =
columnMap[fieldName.lowercase()] ?: fieldName
columnMap[transformedColumnName.lowercase()]
?: transformedColumnName
val columnValue = resultSet.getObject(actualColumnName)
properties[fieldName] =
when (columnValue) {

View File

@@ -5,8 +5,8 @@
package io.airbyte.integrations.destination.postgres.check
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.cdk.load.table.TableName
import io.airbyte.integrations.destination.postgres.client.PostgresAirbyteClient
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
import io.mockk.coEvery

View File

@@ -6,8 +6,8 @@ package io.airbyte.integrations.destination.postgres.client
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_GENERATION_ID
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.cdk.load.table.TableName
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
import io.airbyte.integrations.destination.postgres.sql.COUNT_TOTAL_ALIAS
import io.airbyte.integrations.destination.postgres.sql.Column

View File

@@ -12,9 +12,9 @@ import io.airbyte.cdk.load.data.IntegerType
import io.airbyte.cdk.load.data.ObjectType
import io.airbyte.cdk.load.data.StringType
import io.airbyte.cdk.load.data.TimestampTypeWithTimezone
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.table.CDC_DELETED_AT_COLUMN
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.cdk.load.table.TableName
import io.airbyte.integrations.destination.postgres.spec.CdcDeletionMode
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
import io.mockk.every

View File

@@ -5,18 +5,19 @@
package io.airbyte.integrations.destination.postgres.write
import io.airbyte.cdk.load.command.Dedupe
import io.airbyte.cdk.load.command.DestinationCatalog
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.command.ImportType
import io.airbyte.cdk.load.orchestration.db.DatabaseInitialStatusGatherer
import io.airbyte.cdk.load.orchestration.db.TempTableNameGenerator
import io.airbyte.cdk.load.orchestration.db.direct_load_table.DirectLoadInitialStatus
import io.airbyte.cdk.load.orchestration.db.direct_load_table.DirectLoadTableAppendStreamLoader
import io.airbyte.cdk.load.orchestration.db.direct_load_table.DirectLoadTableDedupStreamLoader
import io.airbyte.cdk.load.orchestration.db.direct_load_table.DirectLoadTableExecutionConfig
import io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping.TableCatalog
import io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping.TableNameInfo
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.cdk.load.table.TableName
import io.airbyte.cdk.load.schema.model.ColumnSchema
import io.airbyte.cdk.load.schema.model.StreamTableSchema
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.schema.model.TableNames
import io.airbyte.cdk.load.table.DatabaseInitialStatusGatherer
import io.airbyte.cdk.load.table.TempTableNameGenerator
import io.airbyte.cdk.load.table.directload.DirectLoadInitialStatus
import io.airbyte.cdk.load.table.directload.DirectLoadTableAppendStreamLoader
import io.airbyte.cdk.load.table.directload.DirectLoadTableDedupStreamLoader
import io.airbyte.cdk.load.table.directload.DirectLoadTableExecutionConfig
import io.airbyte.cdk.load.write.StreamStateStore
import io.airbyte.integrations.destination.postgres.client.PostgresAirbyteClient
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
@@ -33,7 +34,7 @@ import org.junit.jupiter.api.Test
class PostgresWriterTest {
private lateinit var writer: PostgresWriter
private lateinit var names: TableCatalog
private lateinit var catalog: DestinationCatalog
private lateinit var stateGatherer: DatabaseInitialStatusGatherer<DirectLoadInitialStatus>
private lateinit var streamStateStore: StreamStateStore<DirectLoadTableExecutionConfig>
private lateinit var postgresClient: PostgresAirbyteClient
@@ -42,7 +43,7 @@ class PostgresWriterTest {
@BeforeEach
fun setup() {
names = mockk()
catalog = mockk()
stateGatherer = mockk()
streamStateStore = mockk()
postgresClient = mockk()
@@ -51,7 +52,7 @@ class PostgresWriterTest {
writer =
PostgresWriter(
names,
catalog,
stateGatherer,
streamStateStore,
postgresClient,
@@ -66,27 +67,28 @@ class PostgresWriterTest {
val stream = mockk<DestinationStream>()
val finalTableName = TableName("ns", "name")
val mapping = mockk<ColumnNameMapping>(relaxed = true)
val tableNameInfo = mockk<TableNameInfo>(relaxed = true)
every { tableNameInfo.tableNames.finalTableName } returns finalTableName
every { tableNameInfo.columnNameMapping } returns mapping
every { tableNameInfo.component1() } answers { tableNameInfo.tableNames }
every { tableNameInfo.component2() } answers { tableNameInfo.columnNameMapping }
val tableNames = TableNames(finalTableName = finalTableName)
val columnSchema =
ColumnSchema(
inputSchema = emptyMap(),
inputToFinalColumnNames = emptyMap(),
finalSchema = emptyMap()
)
val importType = Dedupe(primaryKey = emptyList(), cursor = emptyList())
val tableSchema = StreamTableSchema(tableNames, columnSchema, importType)
every { stream.importType } returns Dedupe(primaryKey = emptyList(), cursor = emptyList())
every { stream.tableSchema } returns tableSchema
every { stream.importType } returns importType
every { stream.minimumGenerationId } returns 0L
every { stream.generationId } returns 1L
// Mock names map behavior
val namesMap = mapOf(stream to tableNameInfo)
every { names.values } returns namesMap.values
every { names[stream] } returns tableNameInfo
every { catalog.streams } returns listOf(stream)
coEvery { postgresClient.createNamespace(any()) } just Runs
val initialStatus = mockk<DirectLoadInitialStatus>()
coEvery { stateGatherer.gatherInitialStatus(names) } returns mapOf(stream to initialStatus)
coEvery { stateGatherer.gatherInitialStatus() } returns mapOf(stream to initialStatus)
every { tempTableNameGenerator.generate(finalTableName) } returns
TableName("ns", "temp_name")
@@ -103,27 +105,28 @@ class PostgresWriterTest {
val stream = mockk<DestinationStream>()
val finalTableName = TableName("ns", "name")
val mapping = mockk<ColumnNameMapping>(relaxed = true)
val tableNameInfo = mockk<TableNameInfo>(relaxed = true)
every { tableNameInfo.tableNames.finalTableName } returns finalTableName
every { tableNameInfo.columnNameMapping } returns mapping
every { tableNameInfo.component1() } answers { tableNameInfo.tableNames }
every { tableNameInfo.component2() } answers { tableNameInfo.columnNameMapping }
val tableNames = TableNames(finalTableName = finalTableName)
val columnSchema =
ColumnSchema(
inputSchema = emptyMap(),
inputToFinalColumnNames = emptyMap(),
finalSchema = emptyMap()
)
val importType = Dedupe(primaryKey = emptyList(), cursor = emptyList())
val tableSchema = StreamTableSchema(tableNames, columnSchema, importType)
every { stream.importType } returns Dedupe(primaryKey = emptyList(), cursor = emptyList())
every { stream.tableSchema } returns tableSchema
every { stream.importType } returns importType
every { stream.minimumGenerationId } returns 0L
every { stream.generationId } returns 1L
// Mock names map behavior
val namesMap = mapOf(stream to tableNameInfo)
every { names.values } returns namesMap.values
every { names[stream] } returns tableNameInfo
every { catalog.streams } returns listOf(stream)
coEvery { postgresClient.createNamespace(any()) } just Runs
val initialStatus = mockk<DirectLoadInitialStatus>()
coEvery { stateGatherer.gatherInitialStatus(names) } returns mapOf(stream to initialStatus)
coEvery { stateGatherer.gatherInitialStatus() } returns mapOf(stream to initialStatus)
every { tempTableNameGenerator.generate(finalTableName) } returns
TableName("ns", "temp_name")
@@ -143,29 +146,29 @@ class PostgresWriterTest {
val stream = mockk<DestinationStream>()
val finalTableName = TableName("ns", "name")
val mapping = mockk<ColumnNameMapping>(relaxed = true)
val tableNameInfo = mockk<TableNameInfo>(relaxed = true)
every { tableNameInfo.tableNames.finalTableName } returns finalTableName
every { tableNameInfo.columnNameMapping } returns mapping
every { tableNameInfo.component1() } answers { tableNameInfo.tableNames }
every { tableNameInfo.component2() } answers { tableNameInfo.columnNameMapping }
val tableNames = TableNames(finalTableName = finalTableName)
val columnSchema =
ColumnSchema(
inputSchema = emptyMap(),
inputToFinalColumnNames = emptyMap(),
finalSchema = emptyMap()
)
// Use a mock for ImportType that is NOT Dedupe
val appendImportType = mockk<ImportType>()
val tableSchema = StreamTableSchema(tableNames, columnSchema, appendImportType)
every { stream.tableSchema } returns tableSchema
every { stream.importType } returns appendImportType
every { stream.minimumGenerationId } returns 0L
every { stream.generationId } returns 1L
// Mock names map behavior
val namesMap = mapOf(stream to tableNameInfo)
every { names.values } returns namesMap.values
every { names[stream] } returns tableNameInfo
every { catalog.streams } returns listOf(stream)
coEvery { postgresClient.createNamespace(any()) } just Runs
val initialStatus = mockk<DirectLoadInitialStatus>()
coEvery { stateGatherer.gatherInitialStatus(names) } returns mapOf(stream to initialStatus)
coEvery { stateGatherer.gatherInitialStatus() } returns mapOf(stream to initialStatus)
every { tempTableNameGenerator.generate(finalTableName) } returns
TableName("ns", "temp_name")

View File

@@ -580,12 +580,37 @@ dynamic_streams:
- table_id
value: "{{ components_values.id }}"
# Rate limits: https://airtable.com/developers/web/api/rate-limits
# - 5 requests per second per base
# - 50 requests per second per user/service account
api_budget:
type: HTTPAPIBudget
policies:
- type: MovingWindowCallRatePolicy
rates:
- limit: 5
interval: PT1S
matchers: [] # Applies to all endpoints
status_codes_for_ratelimit_hit: [429]
concurrency_level:
type: ConcurrencyLevel
default_concurrency: "{{ config.get('num_workers', 5) }}"
max_concurrency: 40
spec:
type: Spec
connection_specification:
type: object
$schema: http://json-schema.org/draft-07/schema#
properties:
num_workers:
type: integer
title: Number of Concurrent Workers
description: Number of concurrent threads for syncing. Higher values can speed up syncs but may hit rate limits. Airtable limits to 5 requests per second per base.
default: 5
minimum: 2
maximum: 40
credentials:
title: Authentication
type: object

View File

@@ -11,7 +11,7 @@ data:
connectorSubtype: api
connectorType: source
definitionId: 14c6e7ea-97ed-4f5e-a7b5-25e9a80b8212
dockerImageTag: 4.6.15
dockerImageTag: 4.6.16-rc.1
dockerRepository: airbyte/source-airtable
documentationUrl: https://docs.airbyte.com/integrations/sources/airtable
externalDocumentationUrls:
@@ -53,7 +53,7 @@ data:
message: This release introduces changes to columns with formula to parse values directly from `array` to `string` or `number` (where it is possible). Users should refresh the source schema and reset affected streams after upgrading to ensure uninterrupted syncs.
upgradeDeadline: "2023-10-23"
rolloutConfiguration:
enableProgressiveRollout: false
enableProgressiveRollout: true
supportLevel: certified
tags:
- language:manifest-only

View File

@@ -3487,7 +3487,7 @@ spec:
type: integer
title: Number of concurrent workers
minimum: 2
maximum: 10
maximum: 20
default: 10
examples:
- 2
@@ -3547,7 +3547,7 @@ spec:
concurrency_level:
type: ConcurrencyLevel
default_concurrency: "{{ config.get('num_workers', 10) }}"
max_concurrency: 10
max_concurrency: 20
schemas:
attribution_report_performance_adgroup:

View File

@@ -13,7 +13,7 @@ data:
connectorSubtype: api
connectorType: source
definitionId: c6b0a29e-1da9-4512-9002-7bfd0cba2246
dockerImageTag: 7.3.8
dockerImageTag: 7.3.9
dockerRepository: airbyte/source-amazon-ads
documentationUrl: https://docs.airbyte.com/integrations/sources/amazon-ads
githubIssueLabel: source-amazon-ads

View File

@@ -854,7 +854,6 @@ definitions:
name: ListFinancialEventGroups
primary_key:
- FinancialEventGroupId
ignore_stream_slicer_parameters_on_paginated_requests: false
incremental_sync:
type: DatetimeBasedCursor
cursor_field: FinancialEventGroupStart
@@ -881,6 +880,7 @@ definitions:
cursor_granularity: "PT1S"
retriever:
type: SimpleRetriever
ignore_stream_slicer_parameters_on_paginated_requests: true
requester:
$ref: "#/definitions/base_requester"
path: "finances/v0/financialEventGroups"
@@ -929,7 +929,6 @@ definitions:
type: DeclarativeStream
name: ListFinancialEvents
primary_key: []
ignore_stream_slicer_parameters_on_paginated_requests: false
incremental_sync:
type: DatetimeBasedCursor
cursor_field: PostedBefore
@@ -961,6 +960,7 @@ definitions:
value: "{{ stream_slice['end_time'] }}"
retriever:
type: SimpleRetriever
ignore_stream_slicer_parameters_on_paginated_requests: true
requester:
$ref: "#/definitions/base_requester"
path: "finances/v0/financialEvents"

View File

@@ -15,7 +15,7 @@ data:
connectorSubtype: api
connectorType: source
definitionId: e55879a8-0ef8-4557-abcf-ab34c53ec460
dockerImageTag: 5.0.0
dockerImageTag: 5.0.1
dockerRepository: airbyte/source-amazon-seller-partner
documentationUrl: https://docs.airbyte.com/integrations/sources/amazon-seller-partner
erdUrl: https://dbdocs.io/airbyteio/source-amazon-seller-partner?view=relationships

View File

@@ -28,7 +28,7 @@ data:
connectorSubtype: api
connectorType: source
definitionId: d0243522-dccf-4978-8ba0-37ed47a0bdbf
dockerImageTag: 1.5.0
dockerImageTag: 1.5.1
dockerRepository: airbyte/source-asana
githubIssueLabel: source-asana
icon: asana.svg

View File

@@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",]
build-backend = "poetry.core.masonry.api"
[tool.poetry]
version = "1.5.0"
version = "1.5.1"
name = "source-asana"
description = "Source implementation for asana."
authors = [ "Airbyte <contact@airbyte.io>",]
@@ -12,7 +12,8 @@ readme = "README.md"
documentation = "https://docs.airbyte.com/integrations/sources/asana"
homepage = "https://airbyte.com"
repository = "https://github.com/airbytehq/airbyte"
packages = [ { include = "source_asana" }, {include = "main.py" } ]
[[tool.poetry.packages]]
include = "source_asana"
[tool.poetry.dependencies]
python = "^3.10,<3.12"

View File

@@ -2641,6 +2641,8 @@ spec:
title: Organization Export IDs
description: Globally unique identifiers for the organization exports
type: array
items:
type: string
num_workers:
type: integer
title: Number of concurrent workers

View File

@@ -1,3 +1,46 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
import os
import sys
from pathlib import Path
from typing import Any, Mapping
from pytest import fixture
from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource
from airbyte_cdk.test.catalog_builder import CatalogBuilder
from airbyte_cdk.test.state_builder import StateBuilder
pytest_plugins = ["airbyte_cdk.test.utils.manifest_only_fixtures"]
os.environ["REQUEST_CACHE_PATH"] = "REQUEST_CACHE_PATH"
def _get_manifest_path() -> Path:
"""Get path to manifest.yaml, handling both CI and local environments."""
ci_path = Path("/airbyte/integration_code/source_declarative_manifest")
if ci_path.exists():
return ci_path
return Path(__file__).parent.parent
_SOURCE_FOLDER_PATH = _get_manifest_path()
_YAML_FILE_PATH = _SOURCE_FOLDER_PATH / "manifest.yaml"
sys.path.append(str(_SOURCE_FOLDER_PATH))
def get_source(config: Mapping[str, Any], state=None) -> YamlDeclarativeSource:
"""Create a YamlDeclarativeSource instance with the given config."""
catalog = CatalogBuilder().build()
state = StateBuilder().build() if not state else state
return YamlDeclarativeSource(path_to_yaml=str(_YAML_FILE_PATH), catalog=catalog, config=config, state=state)
@fixture(autouse=True)
def clear_cache_before_each_test():
"""CRITICAL: Clear HTTP request cache between tests to ensure isolation."""
cache_dir = Path(os.getenv("REQUEST_CACHE_PATH"))
if cache_dir.exists() and cache_dir.is_dir():
for file_path in cache_dir.glob("*.sqlite"):
file_path.unlink()
yield

View File

@@ -0,0 +1 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.

View File

@@ -0,0 +1,43 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
from __future__ import annotations
from typing import Any, MutableMapping
# Constants for test data - match connector's spec
SITE_API_KEY = "test_api_key_12345"
SITE = "test-site"
START_DATE = "2024-01-01T00:00:00Z"
PRODUCT_CATALOG = "2.0"
class ConfigBuilder:
"""Builder for creating test configurations matching connector spec."""
def __init__(self) -> None:
self._config: MutableMapping[str, Any] = {
"site_api_key": SITE_API_KEY,
"site": SITE,
"start_date": START_DATE,
"product_catalog": PRODUCT_CATALOG,
}
def with_site_api_key(self, site_api_key: str) -> "ConfigBuilder":
self._config["site_api_key"] = site_api_key
return self
def with_site(self, site: str) -> "ConfigBuilder":
self._config["site"] = site
return self
def with_start_date(self, start_date: str) -> "ConfigBuilder":
self._config["start_date"] = start_date
return self
def with_product_catalog(self, product_catalog: str) -> "ConfigBuilder":
self._config["product_catalog"] = product_catalog
return self
def build(self) -> MutableMapping[str, Any]:
return self._config

View File

@@ -0,0 +1,183 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
from __future__ import annotations
from typing import Any, Dict, Optional
from airbyte_cdk.test.mock_http.request import ANY_QUERY_PARAMS, HttpRequest
from .config import SITE
# Must match manifest.yaml base URL exactly
API_BASE_URL = f"https://{SITE}.chargebee.com/api/v2"
class RequestBuilder:
"""Builder for creating HttpRequest objects for testing."""
@classmethod
def endpoint(cls, resource: str) -> "RequestBuilder":
return cls(resource)
@classmethod
def customers_endpoint(cls) -> "RequestBuilder":
return cls(resource="customers")
@classmethod
def customer_contacts_endpoint(cls, customer_id: str) -> "RequestBuilder":
return cls(resource=f"customers/{customer_id}/contacts")
@classmethod
def subscriptions_endpoint(cls) -> "RequestBuilder":
return cls(resource="subscriptions")
@classmethod
def invoices_endpoint(cls) -> "RequestBuilder":
return cls(resource="invoices")
@classmethod
def events_endpoint(cls) -> "RequestBuilder":
return cls(resource="events")
@classmethod
def transactions_endpoint(cls) -> "RequestBuilder":
return cls(resource="transactions")
@classmethod
def plans_endpoint(cls) -> "RequestBuilder":
return cls(resource="plans")
@classmethod
def addons_endpoint(cls) -> "RequestBuilder":
return cls(resource="addons")
@classmethod
def coupons_endpoint(cls) -> "RequestBuilder":
return cls(resource="coupons")
@classmethod
def items_endpoint(cls) -> "RequestBuilder":
return cls(resource="items")
@classmethod
def item_attached_items_endpoint(cls, item_id: str) -> "RequestBuilder":
return cls(resource=f"items/{item_id}/attached_items")
@classmethod
def gifts_endpoint(cls) -> "RequestBuilder":
return cls(resource="gifts")
@classmethod
def credit_notes_endpoint(cls) -> "RequestBuilder":
return cls(resource="credit_notes")
@classmethod
def orders_endpoint(cls) -> "RequestBuilder":
return cls(resource="orders")
@classmethod
def hosted_pages_endpoint(cls) -> "RequestBuilder":
return cls(resource="hosted_pages")
@classmethod
def item_prices_endpoint(cls) -> "RequestBuilder":
return cls(resource="item_prices")
@classmethod
def payment_sources_endpoint(cls) -> "RequestBuilder":
return cls(resource="payment_sources")
@classmethod
def promotional_credits_endpoint(cls) -> "RequestBuilder":
return cls(resource="promotional_credits")
@classmethod
def subscription_scheduled_changes_endpoint(cls, subscription_id: str) -> "RequestBuilder":
return cls(resource=f"subscriptions/{subscription_id}/retrieve_with_scheduled_changes")
@classmethod
def unbilled_charges_endpoint(cls) -> "RequestBuilder":
return cls(resource="unbilled_charges")
@classmethod
def virtual_bank_accounts_endpoint(cls) -> "RequestBuilder":
return cls(resource="virtual_bank_accounts")
@classmethod
def quotes_endpoint(cls) -> "RequestBuilder":
return cls(resource="quotes")
@classmethod
def quote_line_groups_endpoint(cls, quote_id: str) -> "RequestBuilder":
return cls(resource=f"quotes/{quote_id}/quote_line_groups")
@classmethod
def site_migration_details_endpoint(cls) -> "RequestBuilder":
return cls(resource="site_migration_details")
@classmethod
def comments_endpoint(cls) -> "RequestBuilder":
return cls(resource="comments")
@classmethod
def item_families_endpoint(cls) -> "RequestBuilder":
return cls(resource="item_families")
@classmethod
def differential_prices_endpoint(cls) -> "RequestBuilder":
return cls(resource="differential_prices")
def __init__(self, resource: str = "") -> None:
self._resource = resource
self._query_params: Dict[str, Any] = {}
self._any_query_params = False
def with_query_param(self, key: str, value: Any) -> "RequestBuilder":
self._query_params[key] = value
return self
def with_limit(self, limit: int) -> "RequestBuilder":
self._query_params["limit"] = str(limit)
return self
def with_offset(self, offset: str) -> "RequestBuilder":
self._query_params["offset"] = offset
return self
def with_any_query_params(self) -> "RequestBuilder":
"""Use for endpoints with dynamic query params."""
self._any_query_params = True
return self
def with_sort_by_asc(self, field: str) -> "RequestBuilder":
"""Add sort_by[asc] parameter."""
self._query_params["sort_by[asc]"] = field
return self
def with_include_deleted(self, value: str = "true") -> "RequestBuilder":
"""Add include_deleted parameter."""
self._query_params["include_deleted"] = value
return self
def with_updated_at_between(self, start_time: int, end_time: int) -> "RequestBuilder":
"""Add updated_at[between] parameter for incremental streams."""
self._query_params["updated_at[between]"] = f"[{start_time}, {end_time}]"
return self
def with_occurred_at_between(self, start_time: int, end_time: int) -> "RequestBuilder":
"""Add occurred_at[between] parameter for event stream."""
self._query_params["occurred_at[between]"] = f"[{start_time}, {end_time}]"
return self
def with_created_at_between(self, start_time: int, end_time: int) -> "RequestBuilder":
"""Add created_at[between] parameter for comment and promotional_credit streams."""
self._query_params["created_at[between]"] = f"[{start_time}, {end_time}]"
return self
def build(self) -> HttpRequest:
query_params = ANY_QUERY_PARAMS if self._any_query_params else (self._query_params if self._query_params else None)
return HttpRequest(
url=f"{API_BASE_URL}/{self._resource}",
query_params=query_params,
)

View File

@@ -0,0 +1,233 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
from http import HTTPStatus
from pathlib import Path
from airbyte_cdk.test.mock_http import HttpResponse
def _get_response_path() -> Path:
"""Get path to response JSON files."""
return Path(__file__).parent.parent / "resource" / "http" / "response"
def get_json_response(filename: str) -> str:
"""Load a JSON response from the resource directory."""
response_path = _get_response_path() / filename
return response_path.read_text()
def json_response(filename: str, status_code: HTTPStatus = HTTPStatus.OK) -> HttpResponse:
"""Create an HttpResponse from a JSON file."""
body = get_json_response(filename)
return HttpResponse(body=body, status_code=status_code.value, headers={})
def customer_response() -> HttpResponse:
"""Customer stream response."""
return json_response("customer.json")
def customer_response_page1() -> HttpResponse:
"""Customer stream response - page 1 with next_offset."""
return json_response("customer_page1.json")
def customer_response_page2() -> HttpResponse:
"""Customer stream response - page 2 (last page)."""
return json_response("customer_page2.json")
def customer_response_multiple() -> HttpResponse:
"""Customer stream response with multiple records."""
return json_response("customer_multiple.json")
def subscription_response() -> HttpResponse:
"""Subscription stream response."""
return json_response("subscription.json")
def subscription_response_page1() -> HttpResponse:
"""Subscription stream response - page 1 with next_offset."""
return json_response("subscription_page1.json")
def subscription_response_page2() -> HttpResponse:
"""Subscription stream response - page 2 (last page)."""
return json_response("subscription_page2.json")
def invoice_response() -> HttpResponse:
"""Invoice stream response."""
return json_response("invoice.json")
def event_response() -> HttpResponse:
"""Event stream response."""
return json_response("event.json")
def event_response_page1() -> HttpResponse:
"""Event stream response - page 1 with next_offset."""
return json_response("event_page1.json")
def event_response_page2() -> HttpResponse:
"""Event stream response - page 2 (last page)."""
return json_response("event_page2.json")
def transaction_response() -> HttpResponse:
"""Transaction stream response."""
return json_response("transaction.json")
def plan_response() -> HttpResponse:
"""Plan stream response."""
return json_response("plan.json")
def addon_response() -> HttpResponse:
"""Addon stream response."""
return json_response("addon.json")
def coupon_response() -> HttpResponse:
"""Coupon stream response."""
return json_response("coupon.json")
def credit_note_response() -> HttpResponse:
"""Credit note stream response."""
return json_response("credit_note.json")
def gift_response() -> HttpResponse:
"""Gift stream response."""
return json_response("gift.json")
def item_response() -> HttpResponse:
"""Item stream response."""
return json_response("item.json")
def item_response_multiple() -> HttpResponse:
"""Item stream response with multiple records."""
return json_response("item_multiple.json")
def contact_response() -> HttpResponse:
"""Contact stream response (substream of customer)."""
return json_response("contact.json")
def attached_item_response() -> HttpResponse:
"""Attached item stream response (substream of item)."""
return json_response("attached_item.json")
def empty_response() -> HttpResponse:
"""Empty response with no records."""
return json_response("empty.json")
def error_response(status_code: HTTPStatus = HTTPStatus.UNAUTHORIZED) -> HttpResponse:
"""Error response for testing error handling."""
error_files = {
HTTPStatus.UNAUTHORIZED: "error_unauthorized.json",
HTTPStatus.NOT_FOUND: "error_not_found.json",
}
filename = error_files.get(status_code, "error_unauthorized.json")
return json_response(filename, status_code)
def configuration_incompatible_response() -> HttpResponse:
"""Response for configuration_incompatible error (IGNORE action)."""
return json_response("error_configuration_incompatible.json", HTTPStatus.BAD_REQUEST)
def order_response() -> HttpResponse:
"""Order stream response."""
return json_response("order.json")
def hosted_page_response() -> HttpResponse:
"""Hosted page stream response."""
return json_response("hosted_page.json")
def item_price_response() -> HttpResponse:
"""Item price stream response."""
return json_response("item_price.json")
def payment_source_response() -> HttpResponse:
"""Payment source stream response."""
return json_response("payment_source.json")
def promotional_credit_response() -> HttpResponse:
"""Promotional credit stream response."""
return json_response("promotional_credit.json")
def subscription_response_multiple() -> HttpResponse:
"""Subscription stream response with multiple records."""
return json_response("subscription_multiple.json")
def subscription_with_scheduled_changes_response() -> HttpResponse:
"""Subscription with scheduled changes stream response."""
return json_response("subscription_with_scheduled_changes.json")
def unbilled_charge_response() -> HttpResponse:
"""Unbilled charge stream response."""
return json_response("unbilled_charge.json")
def virtual_bank_account_response() -> HttpResponse:
"""Virtual bank account stream response."""
return json_response("virtual_bank_account.json")
def quote_response() -> HttpResponse:
"""Quote stream response."""
return json_response("quote.json")
def quote_response_multiple() -> HttpResponse:
"""Quote stream response with multiple records."""
return json_response("quote_multiple.json")
def quote_line_group_response() -> HttpResponse:
"""Quote line group stream response."""
return json_response("quote_line_group.json")
def site_migration_detail_response() -> HttpResponse:
"""Site migration detail stream response."""
return json_response("site_migration_detail.json")
def comment_response() -> HttpResponse:
"""Comment stream response."""
return json_response("comment.json")
def item_family_response() -> HttpResponse:
"""Item family stream response."""
return json_response("item_family.json")
def differential_price_response() -> HttpResponse:
"""Differential price stream response."""
return json_response("differential_price.json")
def error_no_scheduled_changes_response() -> HttpResponse:
"""Response for 'No changes are scheduled for this subscription' error (IGNORE action)."""
return json_response("error_no_scheduled_changes.json", HTTPStatus.BAD_REQUEST)

View File

@@ -0,0 +1,152 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
from unittest import TestCase
import freezegun
from airbyte_cdk.models import SyncMode
from airbyte_cdk.test.mock_http import HttpMocker
from airbyte_cdk.test.state_builder import StateBuilder
from .request_builder import RequestBuilder
from .response_builder import addon_response, configuration_incompatible_response
from .utils import config, read_output
_STREAM_NAME = "addon"
@freezegun.freeze_time("2024-01-15T12:00:00Z")
class TestAddonStream(TestCase):
"""Tests for the addon stream."""
@HttpMocker()
def test_read_records(self, http_mocker: HttpMocker) -> None:
"""Basic read test for addon stream."""
http_mocker.get(
RequestBuilder.addons_endpoint().with_any_query_params().build(),
addon_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
assert len(output.records) == 1
assert output.records[0].record.data["id"] == "addon_001"
@HttpMocker()
def test_incremental_emits_state(self, http_mocker: HttpMocker) -> None:
"""Test that incremental sync emits state message."""
http_mocker.get(
RequestBuilder.addons_endpoint().with_any_query_params().build(),
addon_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental)
# Verify exactly 1 record returned
assert len(output.records) == 1
# Verify state message was emitted
assert len(output.state_messages) > 0
# Verify state contains correct cursor value
latest_state = output.state_messages[-1].state.stream.stream_state
latest_cursor_value = int(latest_state.__dict__["updated_at"])
# Check response file for the actual timestamp value!
assert latest_cursor_value == 1705312800 # From addon.json
@HttpMocker()
def test_transformation_custom_fields(self, http_mocker: HttpMocker) -> None:
"""Test that CustomFieldTransformation converts cf_* fields to custom_fields array."""
http_mocker.get(
RequestBuilder.addons_endpoint().with_any_query_params().build(),
addon_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
# Assert record exists
assert len(output.records) == 1
record_data = output.records[0].record.data
# Assert cf_ fields are REMOVED from top level
assert not any(
key.startswith("cf_") for key in record_data.keys()
), "cf_ fields should be removed from record and moved to custom_fields array"
# Assert custom_fields array EXISTS
assert "custom_fields" in record_data, "custom_fields array should be created by CustomFieldTransformation"
assert isinstance(record_data["custom_fields"], list)
# Assert custom_fields array contains the transformed fields
assert len(record_data["custom_fields"]) == 2, "custom_fields array should contain 2 transformed fields"
# Verify structure and values of custom_fields items
custom_fields = {cf["name"]: cf["value"] for cf in record_data["custom_fields"]}
assert len(custom_fields) == 2, "Should have exactly 2 custom fields"
@HttpMocker()
def test_incremental_sync_with_state_and_params(self, http_mocker: HttpMocker) -> None:
"""
Test incremental sync with prior state and validate request parameters.
This test validates:
1. State from previous sync is accepted
2. Correct request parameters are sent (sort_by, include_deleted, updated_at[between])
3. State advances to latest record's cursor value
"""
# ARRANGE: Previous state from last sync
previous_state_timestamp = 1704067200 # 2024-01-01T00:00:00
state = StateBuilder().with_stream_state(_STREAM_NAME, {"updated_at": previous_state_timestamp}).build()
# Mock API response with record AFTER the state timestamp
http_mocker.get(
RequestBuilder.addons_endpoint()
.with_sort_by_asc("updated_at")
.with_include_deleted("true")
.with_updated_at_between(previous_state_timestamp, 1705320000) # Frozen time: 2024-01-15T12:00:00Z
.with_limit(100)
.build(),
addon_response(),
)
# ACT: Run incremental sync with state
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental, state=state)
# ASSERT: Records returned
assert len(output.records) == 1, "Should return exactly 1 record"
record = output.records[0].record.data
# ASSERT: Record data is correct
assert record["id"] == "addon_001"
assert record["updated_at"] >= previous_state_timestamp, "Record should be from after the state timestamp"
# ASSERT: State message emitted
assert len(output.state_messages) > 0, "Should emit state messages"
# ASSERT: State advances to latest record
latest_state = output.state_messages[-1].state.stream.stream_state
latest_cursor_value = int(latest_state.__dict__["updated_at"])
# State should advance beyond previous state
assert latest_cursor_value > previous_state_timestamp, f"State should advance: {latest_cursor_value} > {previous_state_timestamp}"
# State should match the latest record's cursor value
assert (
latest_cursor_value == 1705312800
), f"State should be latest record's cursor value: expected 1705312800, got {latest_cursor_value}"
@HttpMocker()
def test_error_configuration_incompatible_ignored(self, http_mocker: HttpMocker) -> None:
"""Test configuration_incompatible error is ignored for addon stream as configured in manifest."""
http_mocker.get(
RequestBuilder.addons_endpoint().with_any_query_params().build(),
configuration_incompatible_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
# Verify no records returned (error was ignored)
assert len(output.records) == 0
# Verify error message from manifest is logged
assert output.is_in_logs("Stream is available only for Product Catalog 1.0")

View File

@@ -0,0 +1,120 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
from unittest import TestCase
import freezegun
from airbyte_cdk.models import SyncMode
from airbyte_cdk.test.mock_http import HttpMocker
from airbyte_cdk.test.state_builder import StateBuilder
from .request_builder import RequestBuilder
from .response_builder import (
attached_item_response,
configuration_incompatible_response,
item_response,
item_response_multiple,
)
from .utils import config, read_output
_STREAM_NAME = "attached_item"
@freezegun.freeze_time("2024-01-15T12:00:00Z")
class TestAttachedItemStream(TestCase):
"""Tests for the attached_item stream (substream of item)."""
@HttpMocker()
def test_read_records(self, http_mocker: HttpMocker) -> None:
"""Basic read test for attached_item stream (substream of item)."""
http_mocker.get(
RequestBuilder.items_endpoint().with_any_query_params().build(),
item_response(),
)
http_mocker.get(
RequestBuilder.item_attached_items_endpoint("item_001").with_any_query_params().build(),
attached_item_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
assert len(output.records) == 1
assert output.records[0].record.data["id"] == "attached_001"
@HttpMocker()
def test_with_multiple_parents(self, http_mocker: HttpMocker) -> None:
"""Test attached_item substream with multiple parent items."""
http_mocker.get(
RequestBuilder.items_endpoint().with_any_query_params().build(),
item_response_multiple(),
)
http_mocker.get(
RequestBuilder.item_attached_items_endpoint("item_001").with_any_query_params().build(),
attached_item_response(),
)
http_mocker.get(
RequestBuilder.item_attached_items_endpoint("item_002").with_any_query_params().build(),
attached_item_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
assert len(output.records) == 2
@HttpMocker()
def test_transformation_custom_fields(self, http_mocker: HttpMocker) -> None:
"""Test that CustomFieldTransformation converts cf_* fields to custom_fields array."""
# Mock parent item stream
http_mocker.get(
RequestBuilder.items_endpoint().with_any_query_params().build(),
item_response(),
)
# Mock attached_item substream (with cf_ fields)
http_mocker.get(
RequestBuilder.item_attached_items_endpoint("item_001").with_any_query_params().build(),
attached_item_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
assert len(output.records) == 1
record_data = output.records[0].record.data
# Assert cf_ fields are REMOVED from top level
assert not any(
key.startswith("cf_") for key in record_data.keys()
), "cf_ fields should be removed from record and moved to custom_fields array"
# Assert custom_fields array EXISTS
assert "custom_fields" in record_data, "custom_fields array should be created by CustomFieldTransformation"
assert isinstance(record_data["custom_fields"], list)
# Assert custom_fields array contains the transformed fields
assert len(record_data["custom_fields"]) == 2, "custom_fields array should contain 2 transformed fields"
# Verify structure and values of custom_fields items
custom_fields = {cf["name"]: cf["value"] for cf in record_data["custom_fields"]}
assert len(custom_fields) == 2, "Should have exactly 2 custom fields"
@HttpMocker()
def test_error_configuration_incompatible_ignored(self, http_mocker: HttpMocker) -> None:
"""Test configuration_incompatible error is ignored for attached_item stream as configured in manifest."""
# Mock parent stream (item) to return successfully
http_mocker.get(
RequestBuilder.items_endpoint().with_any_query_params().build(),
item_response(),
)
# Mock attached_item substream to return CONFIG_INCOMPATIBLE
http_mocker.get(
RequestBuilder.item_attached_items_endpoint("item_001").with_any_query_params().build(),
configuration_incompatible_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
# Verify no records returned (error was ignored)
assert len(output.records) == 0
# Verify error message from manifest is logged
assert output.is_in_logs("Stream is available only for Product Catalog 1.0")

View File

@@ -0,0 +1,152 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
from unittest import TestCase
import freezegun
from airbyte_cdk.models import SyncMode
from airbyte_cdk.test.mock_http import HttpMocker
from airbyte_cdk.test.state_builder import StateBuilder
from .request_builder import RequestBuilder
from .response_builder import comment_response, configuration_incompatible_response
from .utils import config, read_output
_STREAM_NAME = "comment"
@freezegun.freeze_time("2024-01-15T12:00:00Z")
class TestCommentStream(TestCase):
"""Tests for the comment stream."""
@HttpMocker()
def test_read_records(self, http_mocker: HttpMocker) -> None:
"""Basic read test for comment stream."""
http_mocker.get(
RequestBuilder.comments_endpoint().with_any_query_params().build(),
comment_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
assert len(output.records) == 1
assert output.records[0].record.data["id"] == "comment_001"
@HttpMocker()
def test_incremental_emits_state(self, http_mocker: HttpMocker) -> None:
"""Test that incremental sync emits state message."""
http_mocker.get(
RequestBuilder.comments_endpoint().with_any_query_params().build(),
comment_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental)
# Verify exactly 1 record returned
assert len(output.records) == 1
# Verify state message was emitted
assert len(output.state_messages) > 0
# Verify state contains correct cursor value (comment uses created_at)
latest_state = output.state_messages[-1].state.stream.stream_state
latest_cursor_value = int(latest_state.__dict__["created_at"])
# Check response file for the actual timestamp value!
assert latest_cursor_value == 1705312800 # From comment.json
@HttpMocker()
def test_transformation_custom_fields(self, http_mocker: HttpMocker) -> None:
"""Test that CustomFieldTransformation converts cf_* fields to custom_fields array."""
http_mocker.get(
RequestBuilder.comments_endpoint().with_any_query_params().build(),
comment_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
# Assert record exists
assert len(output.records) == 1
record_data = output.records[0].record.data
# Assert cf_ fields are REMOVED from top level
assert not any(
key.startswith("cf_") for key in record_data.keys()
), "cf_ fields should be removed from record and moved to custom_fields array"
# Assert custom_fields array EXISTS
assert "custom_fields" in record_data, "custom_fields array should be created by CustomFieldTransformation"
assert isinstance(record_data["custom_fields"], list)
# Assert custom_fields array contains the transformed fields
assert len(record_data["custom_fields"]) == 2, "custom_fields array should contain 2 transformed fields"
# Verify structure and values of custom_fields items
custom_fields = {cf["name"]: cf["value"] for cf in record_data["custom_fields"]}
assert len(custom_fields) == 2, "Should have exactly 2 custom fields"
@HttpMocker()
def test_incremental_sync_with_state_and_params(self, http_mocker: HttpMocker) -> None:
"""
Test incremental sync with prior state and validate request parameters.
This test validates:
1. State from previous sync is accepted
2. Correct request parameters are sent (sort_by[asc]=created_at, created_at[between])
3. State advances to latest record's cursor value
Note: comment stream uses created_at cursor (not updated_at) and has NO include_deleted.
"""
# ARRANGE: Previous state from last sync
previous_state_timestamp = 1704067200 # 2024-01-01T00:00:00
state = StateBuilder().with_stream_state(_STREAM_NAME, {"created_at": previous_state_timestamp}).build()
# Mock API response with record AFTER the state timestamp
http_mocker.get(
RequestBuilder.comments_endpoint()
.with_sort_by_asc("created_at")
.with_created_at_between(previous_state_timestamp, 1705320000) # Frozen time: 2024-01-15T12:00:00Z
.with_limit(100)
.build(),
comment_response(),
)
# ACT: Run incremental sync with state
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental, state=state)
# ASSERT: Records returned
assert len(output.records) == 1, "Should return exactly 1 record"
record = output.records[0].record.data
# ASSERT: Record data is correct
assert record["id"] == "comment_001"
# ASSERT: State message emitted
assert len(output.state_messages) > 0, "Should emit state messages"
# ASSERT: State advances to latest record
latest_state = output.state_messages[-1].state.stream.stream_state
latest_cursor_value = int(latest_state.__dict__["created_at"])
# State should advance beyond previous state
assert latest_cursor_value > previous_state_timestamp, f"State should advance: {latest_cursor_value} > {previous_state_timestamp}"
# State should match the latest record's cursor value
assert (
latest_cursor_value == 1705312800
), f"State should be latest record's cursor value: expected 1705312800, got {latest_cursor_value}"
@HttpMocker()
def test_error_configuration_incompatible_ignored(self, http_mocker: HttpMocker) -> None:
"""Test configuration_incompatible error is ignored for comment stream as configured in manifest."""
http_mocker.get(
RequestBuilder.comments_endpoint().with_any_query_params().build(),
configuration_incompatible_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
# Verify no records returned (error was ignored)
assert len(output.records) == 0
# Verify error message from manifest is logged
assert output.is_in_logs("Stream is available only for Product Catalog 1.0")

View File

@@ -0,0 +1,121 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
from unittest import TestCase
import freezegun
from airbyte_cdk.models import SyncMode
from airbyte_cdk.test.mock_http import HttpMocker
from airbyte_cdk.test.state_builder import StateBuilder
from .request_builder import RequestBuilder
from .response_builder import (
configuration_incompatible_response,
contact_response,
customer_response,
customer_response_multiple,
)
from .utils import config, read_output
_STREAM_NAME = "contact"
@freezegun.freeze_time("2024-01-15T12:00:00Z")
class TestContactStream(TestCase):
"""Tests for the contact stream (substream of customer)."""
@HttpMocker()
def test_read_records(self, http_mocker: HttpMocker) -> None:
"""Basic read test for contact stream (substream of customer)."""
http_mocker.get(
RequestBuilder.customers_endpoint().with_any_query_params().build(),
customer_response(),
)
http_mocker.get(
RequestBuilder.customer_contacts_endpoint("cust_001").with_any_query_params().build(),
contact_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
assert len(output.records) == 1
assert output.records[0].record.data["id"] == "contact_001"
@HttpMocker()
def test_with_multiple_parents(self, http_mocker: HttpMocker) -> None:
"""Test contact substream with multiple parent customers."""
http_mocker.get(
RequestBuilder.customers_endpoint().with_any_query_params().build(),
customer_response_multiple(),
)
http_mocker.get(
RequestBuilder.customer_contacts_endpoint("cust_001").with_any_query_params().build(),
contact_response(),
)
http_mocker.get(
RequestBuilder.customer_contacts_endpoint("cust_002").with_any_query_params().build(),
contact_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
assert len(output.records) == 2
@HttpMocker()
def test_both_transformations(self, http_mocker: HttpMocker) -> None:
"""
Test that BOTH transformations work together:
1. AddFields adds customer_id from parent stream slice
2. CustomFieldTransformation converts cf_* fields to custom_fields array
"""
# Mock parent customer stream
http_mocker.get(
RequestBuilder.customers_endpoint().with_any_query_params().build(),
customer_response(),
)
# Mock contact substream (with cf_ fields)
http_mocker.get(
RequestBuilder.customer_contacts_endpoint("cust_001").with_any_query_params().build(),
contact_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
assert len(output.records) == 1
record_data = output.records[0].record.data
# ========== Test Transformation #1: AddFields ==========
assert "customer_id" in record_data, "AddFields transformation should add customer_id field"
assert record_data["customer_id"] == "cust_001", "customer_id should match parent stream's id"
# ========== Test Transformation #2: CustomFieldTransformation ==========
assert not any(key.startswith("cf_") for key in record_data.keys()), "cf_ fields should be removed from top level"
assert "custom_fields" in record_data
assert isinstance(record_data["custom_fields"], list)
assert len(record_data["custom_fields"]) == 2
custom_fields = {cf["name"]: cf["value"] for cf in record_data["custom_fields"]}
assert len(custom_fields) == 2
@HttpMocker()
def test_error_configuration_incompatible_ignored(self, http_mocker: HttpMocker) -> None:
"""Test configuration_incompatible error is ignored for contact stream as configured in manifest."""
# Mock parent stream (customer) to return successfully
http_mocker.get(
RequestBuilder.customers_endpoint().with_any_query_params().build(),
customer_response(),
)
# Mock contact substream to return CONFIG_INCOMPATIBLE
http_mocker.get(
RequestBuilder.customer_contacts_endpoint("cust_001").with_any_query_params().build(),
configuration_incompatible_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
# Verify no records returned (error was ignored)
assert len(output.records) == 0
# Verify error message from manifest is logged
assert output.is_in_logs("Stream is available only for Product Catalog 1.0")

View File

@@ -0,0 +1,153 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
from unittest import TestCase
import freezegun
from airbyte_cdk.models import SyncMode
from airbyte_cdk.test.mock_http import HttpMocker
from airbyte_cdk.test.state_builder import StateBuilder
from .request_builder import RequestBuilder
from .response_builder import configuration_incompatible_response, coupon_response
from .utils import config, read_output
_STREAM_NAME = "coupon"
@freezegun.freeze_time("2024-01-15T12:00:00Z")
class TestCouponStream(TestCase):
"""Tests for the coupon stream."""
@HttpMocker()
def test_read_records(self, http_mocker: HttpMocker) -> None:
"""Basic read test for coupon stream."""
http_mocker.get(
RequestBuilder.coupons_endpoint().with_any_query_params().build(),
coupon_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
assert len(output.records) == 1
assert output.records[0].record.data["id"] == "coupon_001"
@HttpMocker()
def test_incremental_emits_state(self, http_mocker: HttpMocker) -> None:
"""Test that incremental sync emits state message."""
http_mocker.get(
RequestBuilder.coupons_endpoint().with_any_query_params().build(),
coupon_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental)
# Verify exactly 1 record returned
assert len(output.records) == 1
# Verify state message was emitted
assert len(output.state_messages) > 0
# Verify state contains correct cursor value
latest_state = output.state_messages[-1].state.stream.stream_state
latest_cursor_value = int(latest_state.__dict__["updated_at"])
# Check response file for the actual timestamp value!
assert latest_cursor_value == 1705312800 # From coupon.json
@HttpMocker()
def test_transformation_custom_fields(self, http_mocker: HttpMocker) -> None:
"""Test that CustomFieldTransformation converts cf_* fields to custom_fields array."""
http_mocker.get(
RequestBuilder.coupons_endpoint().with_any_query_params().build(),
coupon_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
# Assert record exists
assert len(output.records) == 1
record_data = output.records[0].record.data
# Assert cf_ fields are REMOVED from top level
assert not any(
key.startswith("cf_") for key in record_data.keys()
), "cf_ fields should be removed from record and moved to custom_fields array"
# Assert custom_fields array EXISTS
assert "custom_fields" in record_data, "custom_fields array should be created by CustomFieldTransformation"
assert isinstance(record_data["custom_fields"], list)
# Assert custom_fields array contains the transformed fields
assert len(record_data["custom_fields"]) == 2, "custom_fields array should contain 2 transformed fields"
# Verify structure and values of custom_fields items
custom_fields = {cf["name"]: cf["value"] for cf in record_data["custom_fields"]}
assert len(custom_fields) == 2, "Should have exactly 2 custom fields"
@HttpMocker()
def test_incremental_sync_with_state_and_params(self, http_mocker: HttpMocker) -> None:
"""
Test incremental sync with prior state and validate request parameters.
This test validates:
1. State from previous sync is accepted
2. Correct request parameters are sent (only updated_at[between] - NO sort_by or include_deleted)
3. State advances to latest record's cursor value
Note: coupon stream uses updated_at cursor but has NO sort_by or include_deleted parameters.
"""
# ARRANGE: Previous state from last sync
previous_state_timestamp = 1704067200 # 2024-01-01T00:00:00
state = StateBuilder().with_stream_state(_STREAM_NAME, {"updated_at": previous_state_timestamp}).build()
# Mock API response with record AFTER the state timestamp
# Note: Coupon stream does NOT use sort_by or include_deleted
http_mocker.get(
RequestBuilder.coupons_endpoint()
.with_updated_at_between(previous_state_timestamp, 1705320000) # Frozen time: 2024-01-15T12:00:00Z
.with_limit(100)
.build(),
coupon_response(),
)
# ACT: Run incremental sync with state
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental, state=state)
# ASSERT: Records returned
assert len(output.records) == 1, "Should return exactly 1 record"
record = output.records[0].record.data
# ASSERT: Record data is correct
assert record["id"] == "coupon_001"
assert record["updated_at"] >= previous_state_timestamp, "Record should be from after the state timestamp"
# ASSERT: State message emitted
assert len(output.state_messages) > 0, "Should emit state messages"
# ASSERT: State advances to latest record
latest_state = output.state_messages[-1].state.stream.stream_state
latest_cursor_value = int(latest_state.__dict__["updated_at"])
# State should advance beyond previous state
assert latest_cursor_value > previous_state_timestamp, f"State should advance: {latest_cursor_value} > {previous_state_timestamp}"
# State should match the latest record's cursor value
assert (
latest_cursor_value == 1705312800
), f"State should be latest record's cursor value: expected 1705312800, got {latest_cursor_value}"
@HttpMocker()
def test_error_configuration_incompatible_ignored(self, http_mocker: HttpMocker) -> None:
"""Test configuration_incompatible error is ignored for coupon stream as configured in manifest."""
http_mocker.get(
RequestBuilder.coupons_endpoint().with_any_query_params().build(),
configuration_incompatible_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
# Verify no records returned (error was ignored)
assert len(output.records) == 0
# Verify error message from manifest is logged
assert output.is_in_logs("Stream is available only for Product Catalog 1.0")

View File

@@ -0,0 +1,154 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
from unittest import TestCase
import freezegun
from airbyte_cdk.models import SyncMode
from airbyte_cdk.test.mock_http import HttpMocker
from airbyte_cdk.test.state_builder import StateBuilder
from .request_builder import RequestBuilder
from .response_builder import configuration_incompatible_response, credit_note_response
from .utils import config, read_output
_STREAM_NAME = "credit_note"
@freezegun.freeze_time("2024-01-15T12:00:00Z")
class TestCreditNoteStream(TestCase):
"""Tests for the credit_note stream."""
@HttpMocker()
def test_read_records(self, http_mocker: HttpMocker) -> None:
"""Basic read test for credit_note stream."""
http_mocker.get(
RequestBuilder.credit_notes_endpoint().with_any_query_params().build(),
credit_note_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
assert len(output.records) == 1
assert output.records[0].record.data["id"] == "cn_001"
@HttpMocker()
def test_incremental_emits_state(self, http_mocker: HttpMocker) -> None:
"""Test that incremental sync emits state message."""
http_mocker.get(
RequestBuilder.credit_notes_endpoint().with_any_query_params().build(),
credit_note_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental)
# Verify exactly 1 record returned
assert len(output.records) == 1
# Verify state message was emitted
assert len(output.state_messages) > 0
# Verify state contains correct cursor value
latest_state = output.state_messages[-1].state.stream.stream_state
latest_cursor_value = int(latest_state.__dict__["updated_at"])
# Check response file for the actual timestamp value!
assert latest_cursor_value == 1705312800 # From credit_note.json
@HttpMocker()
def test_transformation_custom_fields(self, http_mocker: HttpMocker) -> None:
"""Test that CustomFieldTransformation converts cf_* fields to custom_fields array."""
http_mocker.get(
RequestBuilder.credit_notes_endpoint().with_any_query_params().build(),
credit_note_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
# Assert record exists
assert len(output.records) == 1
record_data = output.records[0].record.data
# Assert cf_ fields are REMOVED from top level
assert not any(
key.startswith("cf_") for key in record_data.keys()
), "cf_ fields should be removed from record and moved to custom_fields array"
# Assert custom_fields array EXISTS
assert "custom_fields" in record_data, "custom_fields array should be created by CustomFieldTransformation"
assert isinstance(record_data["custom_fields"], list)
# Assert custom_fields array contains the transformed fields
assert len(record_data["custom_fields"]) == 2, "custom_fields array should contain 2 transformed fields"
# Verify structure and values of custom_fields items
custom_fields = {cf["name"]: cf["value"] for cf in record_data["custom_fields"]}
assert len(custom_fields) == 2, "Should have exactly 2 custom fields"
@HttpMocker()
def test_incremental_sync_with_state_and_params(self, http_mocker: HttpMocker) -> None:
"""
Test incremental sync with prior state and validate request parameters.
This test validates:
1. State from previous sync is accepted
2. Correct request parameters are sent (sort_by[asc]=date, include_deleted, updated_at[between])
3. State advances to latest record's cursor value
Note: credit_note stream uses updated_at cursor but sorts by "date" (not "updated_at").
"""
# ARRANGE: Previous state from last sync
previous_state_timestamp = 1704067200 # 2024-01-01T00:00:00
state = StateBuilder().with_stream_state(_STREAM_NAME, {"updated_at": previous_state_timestamp}).build()
# Mock API response with record AFTER the state timestamp
http_mocker.get(
RequestBuilder.credit_notes_endpoint()
.with_sort_by_asc("date")
.with_include_deleted("true")
.with_updated_at_between(previous_state_timestamp, 1705320000) # Frozen time: 2024-01-15T12:00:00Z
.with_limit(100)
.build(),
credit_note_response(),
)
# ACT: Run incremental sync with state
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental, state=state)
# ASSERT: Records returned
assert len(output.records) == 1, "Should return exactly 1 record"
record = output.records[0].record.data
# ASSERT: Record data is correct
assert record["id"] == "cn_001"
assert record["updated_at"] >= previous_state_timestamp, "Record should be from after the state timestamp"
# ASSERT: State message emitted
assert len(output.state_messages) > 0, "Should emit state messages"
# ASSERT: State advances to latest record
latest_state = output.state_messages[-1].state.stream.stream_state
latest_cursor_value = int(latest_state.__dict__["updated_at"])
# State should advance beyond previous state
assert latest_cursor_value > previous_state_timestamp, f"State should advance: {latest_cursor_value} > {previous_state_timestamp}"
# State should match the latest record's cursor value
assert (
latest_cursor_value == 1705312800
), f"State should be latest record's cursor value: expected 1705312800, got {latest_cursor_value}"
@HttpMocker()
def test_error_configuration_incompatible_ignored(self, http_mocker: HttpMocker) -> None:
"""Test configuration_incompatible error is ignored for credit_note stream as configured in manifest."""
http_mocker.get(
RequestBuilder.credit_notes_endpoint().with_any_query_params().build(),
configuration_incompatible_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
# Verify no records returned (error was ignored)
assert len(output.records) == 0
# Verify error message from manifest is logged
assert output.is_in_logs("Stream is available only for Product Catalog 1.0")

View File

@@ -0,0 +1,182 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
from unittest import TestCase
import freezegun
from airbyte_cdk.models import SyncMode
from airbyte_cdk.test.mock_http import HttpMocker
from airbyte_cdk.test.state_builder import StateBuilder
from .request_builder import RequestBuilder
from .response_builder import (
customer_response,
customer_response_page1,
customer_response_page2,
)
from .utils import config, read_output
_STREAM_NAME = "customer"
@freezegun.freeze_time("2024-01-15T12:00:00Z")
class TestCustomerStream(TestCase):
"""Tests for the customer stream."""
@HttpMocker()
def test_read_records(self, http_mocker: HttpMocker) -> None:
"""Basic read test for customer stream."""
http_mocker.get(
RequestBuilder.customers_endpoint().with_any_query_params().build(),
customer_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
assert len(output.records) == 1
assert output.records[0].record.data["id"] == "cust_001"
@HttpMocker()
def test_pagination_two_pages(self, http_mocker: HttpMocker) -> None:
"""
Test pagination with 2 pages for customer stream.
IMPORTANT: Verified in manifest.yaml - all 27 streams use identical pagination:
- Type: DefaultPaginator
- Strategy: CursorPagination with next_offset
- Page Size: 100
- Stop Condition: when response has no next_offset
This single test validates pagination behavior for ALL 27 streams:
Standard streams (23): addon, comment, coupon, credit_note, customer,
differential_price, event, gift, hosted_page, invoice, item, item_family,
item_price, order, payment_source, plan, promotional_credit, quote,
site_migration_detail, subscription, transaction, unbilled_charge,
virtual_bank_account
Substreams (4): attached_item, contact, quote_line_group,
subscription_with_scheduled_changes
Test validates:
1. Page 1 response includes next_offset -> connector fetches page 2
2. Page 2 response has no next_offset -> pagination stops
3. All records from both pages are returned (2 records total)
"""
http_mocker.get(
RequestBuilder.customers_endpoint().with_any_query_params().build(),
[
customer_response_page1(),
customer_response_page2(),
],
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
assert len(output.records) == 2
record_ids = [r.record.data["id"] for r in output.records]
assert "cust_001" in record_ids
assert "cust_002" in record_ids
@HttpMocker()
def test_incremental_emits_state(self, http_mocker: HttpMocker) -> None:
"""Test that incremental sync emits state message."""
http_mocker.get(
RequestBuilder.customers_endpoint().with_any_query_params().build(),
customer_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental)
# Verify exactly 1 record returned
assert len(output.records) == 1
# Verify state message was emitted
assert len(output.state_messages) > 0
# Verify state contains correct cursor value
latest_state = output.state_messages[-1].state.stream.stream_state
latest_cursor_value = int(latest_state.__dict__["updated_at"])
# Check response file for the actual timestamp value!
assert latest_cursor_value == 1705312800 # From customer.json
@HttpMocker()
def test_transformation_custom_fields(self, http_mocker: HttpMocker) -> None:
"""Test that CustomFieldTransformation converts cf_* fields to custom_fields array."""
http_mocker.get(
RequestBuilder.customers_endpoint().with_any_query_params().build(),
customer_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
# Assert record exists
assert len(output.records) == 1
record_data = output.records[0].record.data
# Assert cf_ fields are REMOVED from top level
assert not any(
key.startswith("cf_") for key in record_data.keys()
), "cf_ fields should be removed from record and moved to custom_fields array"
# Assert custom_fields array EXISTS
assert "custom_fields" in record_data, "custom_fields array should be created by CustomFieldTransformation"
assert isinstance(record_data["custom_fields"], list)
# Assert custom_fields array contains the transformed fields
assert len(record_data["custom_fields"]) == 2, "custom_fields array should contain 2 transformed fields"
# Verify structure and values of custom_fields items
custom_fields = {cf["name"]: cf["value"] for cf in record_data["custom_fields"]}
assert len(custom_fields) == 2, "Should have exactly 2 custom fields"
@HttpMocker()
def test_incremental_sync_with_state_and_params(self, http_mocker: HttpMocker) -> None:
"""
Test incremental sync with prior state and validate request parameters.
This test validates:
1. State from previous sync is accepted
2. Correct request parameters are sent (sort_by, include_deleted, updated_at[between])
3. State advances to latest record's cursor value
"""
# ARRANGE: Previous state from last sync
previous_state_timestamp = 1704067200 # 2024-01-01T00:00:00
state = StateBuilder().with_stream_state(_STREAM_NAME, {"updated_at": previous_state_timestamp}).build()
# Mock API response with record AFTER the state timestamp
http_mocker.get(
RequestBuilder.customers_endpoint()
.with_sort_by_asc("updated_at")
.with_include_deleted("true")
.with_updated_at_between(previous_state_timestamp, 1705320000) # Frozen time: 2024-01-15T12:00:00Z
.with_limit(100)
.build(),
customer_response(),
)
# ACT: Run incremental sync with state
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental, state=state)
# ASSERT: Records returned
assert len(output.records) == 1, "Should return exactly 1 record"
record = output.records[0].record.data
# ASSERT: Record data is correct
assert record["id"] == "cust_001"
assert record["updated_at"] >= previous_state_timestamp, "Record should be from after the state timestamp"
# ASSERT: State message emitted
assert len(output.state_messages) > 0, "Should emit state messages"
# ASSERT: State advances to latest record
latest_state = output.state_messages[-1].state.stream.stream_state
latest_cursor_value = int(latest_state.__dict__["updated_at"])
# State should advance beyond previous state
assert latest_cursor_value > previous_state_timestamp, f"State should advance: {latest_cursor_value} > {previous_state_timestamp}"
# State should match the latest record's cursor value
assert (
latest_cursor_value == 1705312800
), f"State should be latest record's cursor value: expected 1705312800, got {latest_cursor_value}"

View File

@@ -0,0 +1,152 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
from unittest import TestCase
import freezegun
from airbyte_cdk.models import SyncMode
from airbyte_cdk.test.mock_http import HttpMocker
from airbyte_cdk.test.state_builder import StateBuilder
from .request_builder import RequestBuilder
from .response_builder import configuration_incompatible_response, differential_price_response
from .utils import config, read_output
_STREAM_NAME = "differential_price"
@freezegun.freeze_time("2024-01-15T12:00:00Z")
class TestDifferentialPriceStream(TestCase):
"""Tests for the differential_price stream."""
@HttpMocker()
def test_read_records(self, http_mocker: HttpMocker) -> None:
"""Basic read test for differential_price stream."""
http_mocker.get(
RequestBuilder.differential_prices_endpoint().with_any_query_params().build(),
differential_price_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
assert len(output.records) == 1
assert output.records[0].record.data["id"] == "dp_001"
@HttpMocker()
def test_incremental_emits_state(self, http_mocker: HttpMocker) -> None:
"""Test that incremental sync emits state message."""
http_mocker.get(
RequestBuilder.differential_prices_endpoint().with_any_query_params().build(),
differential_price_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental)
# Verify exactly 1 record returned
assert len(output.records) == 1
# Verify state message was emitted
assert len(output.state_messages) > 0
# Verify state contains correct cursor value
latest_state = output.state_messages[-1].state.stream.stream_state
latest_cursor_value = int(latest_state.__dict__["updated_at"])
# Check response file for the actual timestamp value!
assert latest_cursor_value == 1705312800 # From differential_price.json
@HttpMocker()
def test_transformation_custom_fields(self, http_mocker: HttpMocker) -> None:
"""Test that CustomFieldTransformation converts cf_* fields to custom_fields array."""
http_mocker.get(
RequestBuilder.differential_prices_endpoint().with_any_query_params().build(),
differential_price_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
# Assert record exists
assert len(output.records) == 1
record_data = output.records[0].record.data
# Assert cf_ fields are REMOVED from top level
assert not any(
key.startswith("cf_") for key in record_data.keys()
), "cf_ fields should be removed from record and moved to custom_fields array"
# Assert custom_fields array EXISTS
assert "custom_fields" in record_data, "custom_fields array should be created by CustomFieldTransformation"
assert isinstance(record_data["custom_fields"], list)
# Assert custom_fields array contains the transformed fields
assert len(record_data["custom_fields"]) == 2, "custom_fields array should contain 2 transformed fields"
# Verify structure and values of custom_fields items
custom_fields = {cf["name"]: cf["value"] for cf in record_data["custom_fields"]}
assert len(custom_fields) == 2, "Should have exactly 2 custom fields"
@HttpMocker()
def test_incremental_sync_with_state_and_params(self, http_mocker: HttpMocker) -> None:
"""
Test incremental sync with prior state and validate request parameters.
This test validates:
1. State from previous sync is accepted
2. Correct request parameters are sent (sort_by, include_deleted, updated_at[between])
3. State advances to latest record's cursor value
"""
# ARRANGE: Previous state from last sync
previous_state_timestamp = 1704067200 # 2024-01-01T00:00:00
state = StateBuilder().with_stream_state(_STREAM_NAME, {"updated_at": previous_state_timestamp}).build()
# Mock API response with record AFTER the state timestamp
http_mocker.get(
RequestBuilder.differential_prices_endpoint()
.with_sort_by_asc("updated_at")
.with_include_deleted("true")
.with_updated_at_between(previous_state_timestamp, 1705320000) # Frozen time: 2024-01-15T12:00:00Z
.with_limit(100)
.build(),
differential_price_response(),
)
# ACT: Run incremental sync with state
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental, state=state)
# ASSERT: Records returned
assert len(output.records) == 1, "Should return exactly 1 record"
record = output.records[0].record.data
# ASSERT: Record data is correct
assert record["id"] == "dp_001"
assert record["updated_at"] >= previous_state_timestamp, "Record should be from after the state timestamp"
# ASSERT: State message emitted
assert len(output.state_messages) > 0, "Should emit state messages"
# ASSERT: State advances to latest record
latest_state = output.state_messages[-1].state.stream.stream_state
latest_cursor_value = int(latest_state.__dict__["updated_at"])
# State should advance beyond previous state
assert latest_cursor_value > previous_state_timestamp, f"State should advance: {latest_cursor_value} > {previous_state_timestamp}"
# State should match the latest record's cursor value
assert (
latest_cursor_value == 1705312800
), f"State should be latest record's cursor value: expected 1705312800, got {latest_cursor_value}"
@HttpMocker()
def test_error_configuration_incompatible_ignored(self, http_mocker: HttpMocker) -> None:
"""Test configuration_incompatible error is ignored for differential_price stream as configured in manifest."""
http_mocker.get(
RequestBuilder.differential_prices_endpoint().with_any_query_params().build(),
configuration_incompatible_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
# Verify no records returned (error was ignored)
assert len(output.records) == 0
# Verify error message from manifest is logged
assert output.is_in_logs("Stream is available only for Product Catalog 1.0")

View File

@@ -0,0 +1,64 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
from http import HTTPStatus
from unittest import TestCase
import freezegun
from airbyte_cdk.test.mock_http import HttpMocker
from .request_builder import RequestBuilder
from .response_builder import (
configuration_incompatible_response,
customer_response,
empty_response,
error_response,
)
from .utils import config, read_output
@freezegun.freeze_time("2024-01-15T12:00:00Z")
class TestErrorHandling(TestCase):
"""Tests for error handling."""
@HttpMocker()
def test_error_configuration_incompatible_ignored(self, http_mocker: HttpMocker) -> None:
"""Test configuration_incompatible error is ignored as configured in manifest."""
http_mocker.get(
RequestBuilder.customers_endpoint().with_any_query_params().build(),
configuration_incompatible_response(),
)
output = read_output(config_builder=config(), stream_name="customer")
assert len(output.records) == 0
@HttpMocker()
def test_contact_404_ignored(self, http_mocker: HttpMocker) -> None:
"""Test 404 error is ignored for contact stream as configured in manifest."""
http_mocker.get(
RequestBuilder.customers_endpoint().with_any_query_params().build(),
customer_response(),
)
http_mocker.get(
RequestBuilder.customer_contacts_endpoint("cust_001").with_any_query_params().build(),
error_response(HTTPStatus.NOT_FOUND),
)
output = read_output(config_builder=config(), stream_name="contact")
assert len(output.records) == 0
@freezegun.freeze_time("2024-01-15T12:00:00Z")
class TestEmptyResponse(TestCase):
"""Tests for empty response handling."""
@HttpMocker()
def test_empty_response(self, http_mocker: HttpMocker) -> None:
"""Test handling of empty response."""
http_mocker.get(
RequestBuilder.customers_endpoint().with_any_query_params().build(),
empty_response(),
)
output = read_output(config_builder=config(), stream_name="customer")
assert len(output.records) == 0

View File

@@ -0,0 +1,152 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
from unittest import TestCase
import freezegun
from airbyte_cdk.models import SyncMode
from airbyte_cdk.test.mock_http import HttpMocker
from airbyte_cdk.test.state_builder import StateBuilder
from .request_builder import RequestBuilder
from .response_builder import configuration_incompatible_response, event_response
from .utils import config, read_output
_STREAM_NAME = "event"
@freezegun.freeze_time("2024-01-15T12:00:00Z")
class TestEventStream(TestCase):
"""Tests for the event stream."""
@HttpMocker()
def test_read_records(self, http_mocker: HttpMocker) -> None:
"""Basic read test for event stream."""
http_mocker.get(
RequestBuilder.events_endpoint().with_any_query_params().build(),
event_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
assert len(output.records) == 1
assert output.records[0].record.data["id"] == "ev_001"
@HttpMocker()
def test_incremental_emits_state(self, http_mocker: HttpMocker) -> None:
"""Test that incremental sync emits state message."""
http_mocker.get(
RequestBuilder.events_endpoint().with_any_query_params().build(),
event_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental)
# Verify exactly 1 record returned
assert len(output.records) == 1
# Verify state message was emitted
assert len(output.state_messages) > 0
# Verify state contains correct cursor value (event uses occurred_at)
latest_state = output.state_messages[-1].state.stream.stream_state
latest_cursor_value = int(latest_state.__dict__["occurred_at"])
# Check response file for the actual timestamp value!
assert latest_cursor_value == 1705312800 # From event.json
@HttpMocker()
def test_transformation_custom_fields(self, http_mocker: HttpMocker) -> None:
"""Test that CustomFieldTransformation converts cf_* fields to custom_fields array."""
http_mocker.get(
RequestBuilder.events_endpoint().with_any_query_params().build(),
event_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
# Assert record exists
assert len(output.records) == 1
record_data = output.records[0].record.data
# Assert cf_ fields are REMOVED from top level
assert not any(
key.startswith("cf_") for key in record_data.keys()
), "cf_ fields should be removed from record and moved to custom_fields array"
# Assert custom_fields array EXISTS
assert "custom_fields" in record_data, "custom_fields array should be created by CustomFieldTransformation"
assert isinstance(record_data["custom_fields"], list)
# Assert custom_fields array contains the transformed fields
assert len(record_data["custom_fields"]) == 2, "custom_fields array should contain 2 transformed fields"
# Verify structure and values of custom_fields items
custom_fields = {cf["name"]: cf["value"] for cf in record_data["custom_fields"]}
assert len(custom_fields) == 2, "Should have exactly 2 custom fields"
@HttpMocker()
def test_incremental_sync_with_state_and_params(self, http_mocker: HttpMocker) -> None:
"""
Test incremental sync with prior state and validate request parameters.
This test validates:
1. State from previous sync is accepted
2. Correct request parameters are sent (occurred_at[between] - NO sort_by or include_deleted for event stream)
3. State advances to latest record's cursor value
"""
# ARRANGE: Previous state from last sync
previous_state_timestamp = 1704067200 # 2024-01-01T00:00:00
state = StateBuilder().with_stream_state(_STREAM_NAME, {"occurred_at": previous_state_timestamp}).build()
# Mock API response with record AFTER the state timestamp
# Note: Event stream uses sort_by[asc]=occurred_at and occurred_at[between], but NO include_deleted
http_mocker.get(
RequestBuilder.events_endpoint()
.with_sort_by_asc("occurred_at")
.with_occurred_at_between(previous_state_timestamp, 1705320000) # Frozen time: 2024-01-15T12:00:00Z
.with_limit(100)
.build(),
event_response(),
)
# ACT: Run incremental sync with state
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental, state=state)
# ASSERT: Records returned
assert len(output.records) == 1, "Should return exactly 1 record"
record = output.records[0].record.data
# ASSERT: Record data is correct
assert record["id"] == "ev_001"
assert record["occurred_at"] >= previous_state_timestamp, "Record should be from after the state timestamp"
# ASSERT: State message emitted
assert len(output.state_messages) > 0, "Should emit state messages"
# ASSERT: State advances to latest record
latest_state = output.state_messages[-1].state.stream.stream_state
latest_cursor_value = int(latest_state.__dict__["occurred_at"])
# State should advance beyond previous state
assert latest_cursor_value > previous_state_timestamp, f"State should advance: {latest_cursor_value} > {previous_state_timestamp}"
# State should match the latest record's cursor value
assert (
latest_cursor_value == 1705312800
), f"State should be latest record's cursor value: expected 1705312800, got {latest_cursor_value}"
@HttpMocker()
def test_error_configuration_incompatible_ignored(self, http_mocker: HttpMocker) -> None:
"""Test configuration_incompatible error is ignored for event stream as configured in manifest."""
http_mocker.get(
RequestBuilder.events_endpoint().with_any_query_params().build(),
configuration_incompatible_response(),
)
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
# Verify no records returned (error was ignored)
assert len(output.records) == 0
# Verify error message from manifest is logged
assert output.is_in_logs("Stream is available only for Product Catalog 1.0")

Some files were not shown because too many files have changed in this diff Show More