Merge branch 'master' into devin/1765229017-fix-klaviyo-profile-subscriptions
This commit is contained in:
@@ -139,8 +139,8 @@ runs:
|
|||||||
CONNECTOR_VERSION_TAG="${{ inputs.tag-override }}"
|
CONNECTOR_VERSION_TAG="${{ inputs.tag-override }}"
|
||||||
echo "🏷 Using provided tag override: $CONNECTOR_VERSION_TAG"
|
echo "🏷 Using provided tag override: $CONNECTOR_VERSION_TAG"
|
||||||
elif [[ "${{ inputs.release-type }}" == "pre-release" ]]; then
|
elif [[ "${{ inputs.release-type }}" == "pre-release" ]]; then
|
||||||
hash=$(git rev-parse --short=10 HEAD)
|
hash=$(git rev-parse --short=7 HEAD)
|
||||||
CONNECTOR_VERSION_TAG="${CONNECTOR_VERSION}-dev.${hash}"
|
CONNECTOR_VERSION_TAG="${CONNECTOR_VERSION}-preview.${hash}"
|
||||||
echo "🏷 Using pre-release tag: $CONNECTOR_VERSION_TAG"
|
echo "🏷 Using pre-release tag: $CONNECTOR_VERSION_TAG"
|
||||||
else
|
else
|
||||||
CONNECTOR_VERSION_TAG="$CONNECTOR_VERSION"
|
CONNECTOR_VERSION_TAG="$CONNECTOR_VERSION"
|
||||||
|
|||||||
2
.github/pr-welcome-community.md
vendored
2
.github/pr-welcome-community.md
vendored
@@ -21,7 +21,7 @@ As needed or by request, Airbyte Maintainers can execute the following slash com
|
|||||||
- `/run-live-tests` - Runs live tests for the modified connector(s).
|
- `/run-live-tests` - Runs live tests for the modified connector(s).
|
||||||
- `/run-regression-tests` - Runs regression tests for the modified connector(s).
|
- `/run-regression-tests` - Runs regression tests for the modified connector(s).
|
||||||
- `/build-connector-images` - Builds and publishes a pre-release docker image for the modified connector(s).
|
- `/build-connector-images` - Builds and publishes a pre-release docker image for the modified connector(s).
|
||||||
- `/publish-connectors-prerelease` - Publishes pre-release connector builds (tagged as `{version}-dev.{git-sha}`) for all modified connectors in the PR.
|
- `/publish-connectors-prerelease` - Publishes pre-release connector builds (tagged as `{version}-preview.{git-sha}`) for all modified connectors in the PR.
|
||||||
|
|
||||||
If you have any questions, feel free to ask in the PR comments or join our [Slack community](https://airbytehq.slack.com/).
|
If you have any questions, feel free to ask in the PR comments or join our [Slack community](https://airbytehq.slack.com/).
|
||||||
|
|
||||||
|
|||||||
9
.github/pr-welcome-internal.md
vendored
9
.github/pr-welcome-internal.md
vendored
@@ -21,11 +21,18 @@ Airbyte Maintainers (that's you!) can execute the following slash commands on yo
|
|||||||
- `/bump-version` - Bumps connector versions.
|
- `/bump-version` - Bumps connector versions.
|
||||||
- You can specify a custom changelog by passing `changelog`. Example: `/bump-version changelog="My cool update"`
|
- You can specify a custom changelog by passing `changelog`. Example: `/bump-version changelog="My cool update"`
|
||||||
- Leaving the changelog arg blank will auto-populate the changelog from the PR title.
|
- Leaving the changelog arg blank will auto-populate the changelog from the PR title.
|
||||||
|
- `/bump-progressive-rollout-version` - Bumps connector version with an RC suffix for progressive rollouts.
|
||||||
|
- Creates a release candidate version (e.g., `2.16.10-rc.1`) with `enableProgressiveRollout: true`
|
||||||
|
- Example: `/bump-progressive-rollout-version changelog="Add new feature for progressive rollout"`
|
||||||
- `/run-cat-tests` - Runs legacy CAT tests (Connector Acceptance Tests)
|
- `/run-cat-tests` - Runs legacy CAT tests (Connector Acceptance Tests)
|
||||||
- `/run-live-tests` - Runs live tests for the modified connector(s).
|
- `/run-live-tests` - Runs live tests for the modified connector(s).
|
||||||
- `/run-regression-tests` - Runs regression tests for the modified connector(s).
|
- `/run-regression-tests` - Runs regression tests for the modified connector(s).
|
||||||
- `/build-connector-images` - Builds and publishes a pre-release docker image for the modified connector(s).
|
- `/build-connector-images` - Builds and publishes a pre-release docker image for the modified connector(s).
|
||||||
- `/publish-connectors-prerelease` - Publishes pre-release connector builds (tagged as `{version}-dev.{git-sha}`) for all modified connectors in the PR.
|
- `/publish-connectors-prerelease` - Publishes pre-release connector builds (tagged as `{version}-preview.{git-sha}`) for all modified connectors in the PR.
|
||||||
|
- Connector release lifecycle (AI-powered):
|
||||||
|
- `/ai-prove-fix` - Runs prerelease readiness checks, including testing against customer connections.
|
||||||
|
- `/ai-canary-prerelease` - Rolls out prerelease to 5-10 connections for canary testing.
|
||||||
|
- `/ai-release-watch` - Monitors rollout post-release and tracks sync success rates.
|
||||||
- JVM connectors:
|
- JVM connectors:
|
||||||
- `/update-connector-cdk-version connector=<CONNECTOR_NAME>` - Updates the specified connector to the latest CDK version.
|
- `/update-connector-cdk-version connector=<CONNECTOR_NAME>` - Updates the specified connector to the latest CDK version.
|
||||||
Example: `/update-connector-cdk-version connector=destination-bigquery`
|
Example: `/update-connector-cdk-version connector=destination-bigquery`
|
||||||
|
|||||||
72
.github/workflows/ai-canary-prerelease-command.yml
vendored
Normal file
72
.github/workflows/ai-canary-prerelease-command.yml
vendored
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
name: AI Canary Prerelease Command
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
pr:
|
||||||
|
description: "Pull request number (if triggered from a PR)"
|
||||||
|
type: number
|
||||||
|
required: false
|
||||||
|
comment-id:
|
||||||
|
description: "The comment-id of the slash command. Used to update the comment with the status."
|
||||||
|
required: false
|
||||||
|
repo:
|
||||||
|
description: "Repo (passed by slash command dispatcher)"
|
||||||
|
required: false
|
||||||
|
default: "airbytehq/airbyte"
|
||||||
|
gitref:
|
||||||
|
description: "Git ref (passed by slash command dispatcher)"
|
||||||
|
required: false
|
||||||
|
|
||||||
|
run-name: "AI Canary Prerelease for PR #${{ github.event.inputs.pr }}"
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
issues: write
|
||||||
|
pull-requests: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
ai-canary-prerelease:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Get job variables
|
||||||
|
id: job-vars
|
||||||
|
run: |
|
||||||
|
echo "run-url=https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Authenticate as GitHub App
|
||||||
|
uses: actions/create-github-app-token@v2
|
||||||
|
id: get-app-token
|
||||||
|
with:
|
||||||
|
owner: "airbytehq"
|
||||||
|
repositories: "airbyte,oncall"
|
||||||
|
app-id: ${{ secrets.OCTAVIA_BOT_APP_ID }}
|
||||||
|
private-key: ${{ secrets.OCTAVIA_BOT_PRIVATE_KEY }}
|
||||||
|
|
||||||
|
- name: Post start comment
|
||||||
|
if: inputs.comment-id != ''
|
||||||
|
uses: peter-evans/create-or-update-comment@v4
|
||||||
|
with:
|
||||||
|
token: ${{ steps.get-app-token.outputs.token }}
|
||||||
|
comment-id: ${{ inputs.comment-id }}
|
||||||
|
issue-number: ${{ inputs.pr }}
|
||||||
|
body: |
|
||||||
|
> **AI Canary Prerelease Started**
|
||||||
|
>
|
||||||
|
> Rolling out to 5-10 connections, watching results, and reporting findings.
|
||||||
|
> [View workflow run](${{ steps.job-vars.outputs.run-url }})
|
||||||
|
|
||||||
|
- name: Run AI Canary Prerelease
|
||||||
|
uses: aaronsteers/devin-action@main
|
||||||
|
with:
|
||||||
|
comment-id: ${{ inputs.comment-id }}
|
||||||
|
issue-number: ${{ inputs.pr }}
|
||||||
|
playbook-macro: "!canary_prerelease"
|
||||||
|
devin-token: ${{ secrets.DEVIN_AI_API_KEY }}
|
||||||
|
github-token: ${{ steps.get-app-token.outputs.token }}
|
||||||
|
start-message: "🐤 **AI Canary Prerelease session starting...** Rolling out to 5-10 connections, watching results, and reporting findings. [View playbook](https://github.com/airbytehq/oncall/blob/main/prompts/playbooks/canary_prerelease.md)"
|
||||||
|
tags: |
|
||||||
|
ai-oncall
|
||||||
72
.github/workflows/ai-prove-fix-command.yml
vendored
Normal file
72
.github/workflows/ai-prove-fix-command.yml
vendored
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
name: AI Prove Fix Command
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
pr:
|
||||||
|
description: "Pull request number (if triggered from a PR)"
|
||||||
|
type: number
|
||||||
|
required: false
|
||||||
|
comment-id:
|
||||||
|
description: "The comment-id of the slash command. Used to update the comment with the status."
|
||||||
|
required: false
|
||||||
|
repo:
|
||||||
|
description: "Repo (passed by slash command dispatcher)"
|
||||||
|
required: false
|
||||||
|
default: "airbytehq/airbyte"
|
||||||
|
gitref:
|
||||||
|
description: "Git ref (passed by slash command dispatcher)"
|
||||||
|
required: false
|
||||||
|
|
||||||
|
run-name: "AI Prove Fix for PR #${{ github.event.inputs.pr }}"
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
issues: write
|
||||||
|
pull-requests: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
ai-prove-fix:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Get job variables
|
||||||
|
id: job-vars
|
||||||
|
run: |
|
||||||
|
echo "run-url=https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Authenticate as GitHub App
|
||||||
|
uses: actions/create-github-app-token@v2
|
||||||
|
id: get-app-token
|
||||||
|
with:
|
||||||
|
owner: "airbytehq"
|
||||||
|
repositories: "airbyte,oncall"
|
||||||
|
app-id: ${{ secrets.OCTAVIA_BOT_APP_ID }}
|
||||||
|
private-key: ${{ secrets.OCTAVIA_BOT_PRIVATE_KEY }}
|
||||||
|
|
||||||
|
- name: Post start comment
|
||||||
|
if: inputs.comment-id != ''
|
||||||
|
uses: peter-evans/create-or-update-comment@v4
|
||||||
|
with:
|
||||||
|
token: ${{ steps.get-app-token.outputs.token }}
|
||||||
|
comment-id: ${{ inputs.comment-id }}
|
||||||
|
issue-number: ${{ inputs.pr }}
|
||||||
|
body: |
|
||||||
|
> **AI Prove Fix Started**
|
||||||
|
>
|
||||||
|
> Running readiness checks and testing against customer connections.
|
||||||
|
> [View workflow run](${{ steps.job-vars.outputs.run-url }})
|
||||||
|
|
||||||
|
- name: Run AI Prove Fix
|
||||||
|
uses: aaronsteers/devin-action@main
|
||||||
|
with:
|
||||||
|
comment-id: ${{ inputs.comment-id }}
|
||||||
|
issue-number: ${{ inputs.pr }}
|
||||||
|
playbook-macro: "!prove_fix"
|
||||||
|
devin-token: ${{ secrets.DEVIN_AI_API_KEY }}
|
||||||
|
github-token: ${{ steps.get-app-token.outputs.token }}
|
||||||
|
start-message: "🔍 **AI Prove Fix session starting...** Running readiness checks and testing against customer connections. [View playbook](https://github.com/airbytehq/oncall/blob/main/prompts/playbooks/prove_fix.md)"
|
||||||
|
tags: |
|
||||||
|
ai-oncall
|
||||||
72
.github/workflows/ai-release-watch-command.yml
vendored
Normal file
72
.github/workflows/ai-release-watch-command.yml
vendored
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
name: AI Release Watch Command
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
pr:
|
||||||
|
description: "Pull request number (if triggered from a PR)"
|
||||||
|
type: number
|
||||||
|
required: false
|
||||||
|
comment-id:
|
||||||
|
description: "The comment-id of the slash command. Used to update the comment with the status."
|
||||||
|
required: false
|
||||||
|
repo:
|
||||||
|
description: "Repo (passed by slash command dispatcher)"
|
||||||
|
required: false
|
||||||
|
default: "airbytehq/airbyte"
|
||||||
|
gitref:
|
||||||
|
description: "Git ref (passed by slash command dispatcher)"
|
||||||
|
required: false
|
||||||
|
|
||||||
|
run-name: "AI Release Watch for PR #${{ github.event.inputs.pr }}"
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
issues: write
|
||||||
|
pull-requests: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
ai-release-watch:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Get job variables
|
||||||
|
id: job-vars
|
||||||
|
run: |
|
||||||
|
echo "run-url=https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Authenticate as GitHub App
|
||||||
|
uses: actions/create-github-app-token@v2
|
||||||
|
id: get-app-token
|
||||||
|
with:
|
||||||
|
owner: "airbytehq"
|
||||||
|
repositories: "airbyte,oncall"
|
||||||
|
app-id: ${{ secrets.OCTAVIA_BOT_APP_ID }}
|
||||||
|
private-key: ${{ secrets.OCTAVIA_BOT_PRIVATE_KEY }}
|
||||||
|
|
||||||
|
- name: Post start comment
|
||||||
|
if: inputs.comment-id != ''
|
||||||
|
uses: peter-evans/create-or-update-comment@v4
|
||||||
|
with:
|
||||||
|
token: ${{ steps.get-app-token.outputs.token }}
|
||||||
|
comment-id: ${{ inputs.comment-id }}
|
||||||
|
issue-number: ${{ inputs.pr }}
|
||||||
|
body: |
|
||||||
|
> **AI Release Watch Started**
|
||||||
|
>
|
||||||
|
> Monitoring rollout and tracking sync success rates.
|
||||||
|
> [View workflow run](${{ steps.job-vars.outputs.run-url }})
|
||||||
|
|
||||||
|
- name: Run AI Release Watch
|
||||||
|
uses: aaronsteers/devin-action@main
|
||||||
|
with:
|
||||||
|
comment-id: ${{ inputs.comment-id }}
|
||||||
|
issue-number: ${{ inputs.pr }}
|
||||||
|
playbook-macro: "!release_watch"
|
||||||
|
devin-token: ${{ secrets.DEVIN_AI_API_KEY }}
|
||||||
|
github-token: ${{ steps.get-app-token.outputs.token }}
|
||||||
|
start-message: "👁️ **AI Release Watch session starting...** Monitoring rollout and tracking sync success rates. [View playbook](https://github.com/airbytehq/oncall/blob/main/prompts/playbooks/release_watch.md)"
|
||||||
|
tags: |
|
||||||
|
ai-oncall
|
||||||
178
.github/workflows/bump-progressive-rollout-version-command.yml
vendored
Normal file
178
.github/workflows/bump-progressive-rollout-version-command.yml
vendored
Normal file
@@ -0,0 +1,178 @@
|
|||||||
|
name: Bump connector version for progressive rollout
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
pr:
|
||||||
|
description: "Pull request number. This PR will be referenced in the changelog line."
|
||||||
|
type: number
|
||||||
|
required: false
|
||||||
|
comment-id:
|
||||||
|
description: "Optional. The comment-id of the slash command. Used to update the comment with the status."
|
||||||
|
required: false
|
||||||
|
|
||||||
|
type:
|
||||||
|
description: "The type of bump to perform. One of 'major', 'minor', or 'patch'."
|
||||||
|
required: false
|
||||||
|
default: "patch"
|
||||||
|
|
||||||
|
changelog:
|
||||||
|
description: "Optional. The comment to add to the changelog. If not provided, the PR title will be used."
|
||||||
|
required: false
|
||||||
|
default: ""
|
||||||
|
|
||||||
|
# These must be declared, but they are unused and ignored.
|
||||||
|
# TODO: Infer 'repo' and 'gitref' from PR number on other workflows, so we can remove these.
|
||||||
|
repo:
|
||||||
|
description: "Repo (Ignored)"
|
||||||
|
required: false
|
||||||
|
default: "airbytehq/airbyte"
|
||||||
|
gitref:
|
||||||
|
description: "Ref (Ignored)"
|
||||||
|
required: false
|
||||||
|
|
||||||
|
run-name: "Bump connector version for progressive rollout in PR: #${{ github.event.inputs.pr }}"
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.event.inputs.pr }}
|
||||||
|
# Cancel any previous runs on the same branch if they are still in progress
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
bump-progressive-rollout-version:
|
||||||
|
name: "Bump version of connectors for progressive rollout in this PR"
|
||||||
|
runs-on: ubuntu-24.04
|
||||||
|
steps:
|
||||||
|
- name: Get job variables
|
||||||
|
id: job-vars
|
||||||
|
env:
|
||||||
|
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
PR_JSON=$(gh api repos/${{ github.repository }}/pulls/${{ github.event.inputs.pr }})
|
||||||
|
echo "repo=$(echo "$PR_JSON" | jq -r .head.repo.full_name)" >> $GITHUB_OUTPUT
|
||||||
|
echo "branch=$(echo "$PR_JSON" | jq -r .head.ref)" >> $GITHUB_OUTPUT
|
||||||
|
echo "pr_title=$(echo "$PR_JSON" | jq -r .title)" >> $GITHUB_OUTPUT
|
||||||
|
echo "run-url=https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
# NOTE: We still use a PAT here (rather than a GitHub App) because the workflow needs
|
||||||
|
# permissions to add commits to our main repo as well as forks. This will only work on
|
||||||
|
# forks if the user installs the app into their fork. Until we document this as a clear
|
||||||
|
# path, we will have to keep using the PAT.
|
||||||
|
- name: Checkout Airbyte
|
||||||
|
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
|
||||||
|
with:
|
||||||
|
repository: ${{ steps.job-vars.outputs.repo }}
|
||||||
|
ref: ${{ steps.job-vars.outputs.branch }}
|
||||||
|
fetch-depth: 1
|
||||||
|
# Important that token is a PAT so that CI checks are triggered again.
|
||||||
|
# Without this we would be forever waiting on required checks to pass.
|
||||||
|
token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }}
|
||||||
|
|
||||||
|
- name: Append comment with job run link
|
||||||
|
# If comment-id is not provided, this will create a new
|
||||||
|
# comment with the job run link.
|
||||||
|
id: first-comment-action
|
||||||
|
uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
|
||||||
|
with:
|
||||||
|
comment-id: ${{ github.event.inputs.comment-id }}
|
||||||
|
issue-number: ${{ github.event.inputs.pr }}
|
||||||
|
body: |
|
||||||
|
|
||||||
|
> **Progressive Rollout Version Bump Started**
|
||||||
|
>
|
||||||
|
> This will bump the connector version with an RC suffix and enable progressive rollout.
|
||||||
|
> [Check job output.][1]
|
||||||
|
|
||||||
|
[1]: ${{ steps.job-vars.outputs.run-url }}
|
||||||
|
|
||||||
|
- name: Log changelog source
|
||||||
|
run: |
|
||||||
|
if [ -n "${{ github.event.inputs.changelog }}" ]; then
|
||||||
|
echo "Using user-provided changelog: ${{ github.event.inputs.changelog }}"
|
||||||
|
else
|
||||||
|
echo "Using PR title as changelog: ${{ steps.job-vars.outputs.pr_title }}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Run airbyte-ci connectors --modified bump-version with --rc flag
|
||||||
|
uses: ./.github/actions/run-airbyte-ci
|
||||||
|
continue-on-error: true
|
||||||
|
with:
|
||||||
|
context: "manual"
|
||||||
|
gcs_credentials: ${{ secrets.METADATA_SERVICE_PROD_GCS_CREDENTIALS }}
|
||||||
|
sentry_dsn: ${{ secrets.SENTRY_AIRBYTE_CI_DSN }}
|
||||||
|
github_token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }}
|
||||||
|
git_repo_url: https://github.com/${{ steps.job-vars.outputs.repo }}.git
|
||||||
|
subcommand: |
|
||||||
|
connectors --modified bump-version \
|
||||||
|
${{ github.event.inputs.type }} \
|
||||||
|
"${{ github.event.inputs.changelog != '' && github.event.inputs.changelog || steps.job-vars.outputs.pr_title }}" \
|
||||||
|
--pr-number ${{ github.event.inputs.pr }} \
|
||||||
|
--rc
|
||||||
|
|
||||||
|
# This is helpful in the case that we change a previously committed generated file to be ignored by git.
|
||||||
|
- name: Remove any files that have been gitignored
|
||||||
|
run: git ls-files -i -c --exclude-from=.gitignore | xargs -r git rm --cached
|
||||||
|
|
||||||
|
# Check for changes in git
|
||||||
|
- name: Check for changes
|
||||||
|
id: git-diff
|
||||||
|
run: |
|
||||||
|
git diff --quiet && echo "No changes to commit" || echo "changes=true" >> $GITHUB_OUTPUT
|
||||||
|
shell: bash
|
||||||
|
|
||||||
|
# Commit changes (if any)
|
||||||
|
- name: Commit changes
|
||||||
|
id: commit-step
|
||||||
|
if: steps.git-diff.outputs.changes == 'true'
|
||||||
|
run: |
|
||||||
|
git config --global user.name "Octavia Squidington III"
|
||||||
|
git config --global user.email "octavia-squidington-iii@users.noreply.github.com"
|
||||||
|
git add .
|
||||||
|
git commit -m "chore: bump-version for progressive rollout"
|
||||||
|
echo "sha=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
- name: Push changes to '(${{ steps.job-vars.outputs.repo }})'
|
||||||
|
if: steps.git-diff.outputs.changes == 'true'
|
||||||
|
run: |
|
||||||
|
git remote add contributor https://github.com/${{ steps.job-vars.outputs.repo }}.git
|
||||||
|
git push contributor HEAD:'${{ steps.job-vars.outputs.branch }}'
|
||||||
|
|
||||||
|
- name: Append success comment
|
||||||
|
uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
|
||||||
|
if: steps.git-diff.outputs.changes == 'true'
|
||||||
|
with:
|
||||||
|
comment-id: ${{ steps.first-comment-action.outputs.comment-id }}
|
||||||
|
reactions: hooray
|
||||||
|
body: |
|
||||||
|
> **Progressive Rollout Version Bump: SUCCESS**
|
||||||
|
>
|
||||||
|
> The connector version has been bumped with an RC suffix (e.g., `X.Y.Z-rc.1`).
|
||||||
|
> Changes applied successfully. (${{ steps.commit-step.outputs.sha }})
|
||||||
|
>
|
||||||
|
> **Next steps:**
|
||||||
|
> 1. Merge this PR to publish the RC version
|
||||||
|
> 2. Monitor the progressive rollout in production
|
||||||
|
> 3. When ready to promote, use the `finalize_rollout` workflow with `action=promote`
|
||||||
|
> 4. If issues arise, use `action=rollback` instead
|
||||||
|
|
||||||
|
- name: Append success comment (no-op)
|
||||||
|
uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
|
||||||
|
if: steps.git-diff.outputs.changes != 'true'
|
||||||
|
with:
|
||||||
|
comment-id: ${{ steps.first-comment-action.outputs.comment-id }}
|
||||||
|
reactions: "-1"
|
||||||
|
body: |
|
||||||
|
> Job completed successfully (no changes detected).
|
||||||
|
>
|
||||||
|
> This might happen if:
|
||||||
|
> - The connector already has an RC version
|
||||||
|
> - No modified connectors were detected in this PR
|
||||||
|
|
||||||
|
- name: Append failure comment
|
||||||
|
uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
|
||||||
|
if: failure()
|
||||||
|
with:
|
||||||
|
comment-id: ${{ steps.first-comment-action.outputs.comment-id }}
|
||||||
|
reactions: confused
|
||||||
|
body: |
|
||||||
|
> Job failed. Check the [workflow logs](${{ steps.job-vars.outputs.run-url }}) for details.
|
||||||
28
.github/workflows/label-community-prs.yml
vendored
Normal file
28
.github/workflows/label-community-prs.yml
vendored
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
name: Label Community PRs
|
||||||
|
|
||||||
|
# This workflow automatically adds the "community" label to PRs from forks.
|
||||||
|
# This enables automatic tracking on the Community PRs project board.
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request_target:
|
||||||
|
types:
|
||||||
|
- opened
|
||||||
|
- reopened
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
label-community-pr:
|
||||||
|
name: Add "Community" Label to PR
|
||||||
|
# Only run for PRs from forks
|
||||||
|
if: github.event.pull_request.head.repo.fork == true
|
||||||
|
runs-on: ubuntu-24.04
|
||||||
|
permissions:
|
||||||
|
issues: write
|
||||||
|
pull-requests: write
|
||||||
|
steps:
|
||||||
|
- name: Add community label
|
||||||
|
# This action uses GitHub's addLabels API, which is idempotent.
|
||||||
|
# If the label already exists, the API call succeeds without error.
|
||||||
|
uses: actions-ecosystem/action-add-labels@bd52874380e3909a1ac983768df6976535ece7f8 # v1.1.3
|
||||||
|
with:
|
||||||
|
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
labels: community
|
||||||
@@ -3,7 +3,7 @@ name: Publish Connectors Pre-release
|
|||||||
# It can be triggered via the /publish-connectors-prerelease slash command from PR comments,
|
# It can be triggered via the /publish-connectors-prerelease slash command from PR comments,
|
||||||
# or via the MCP tool `publish_connector_to_airbyte_registry`.
|
# or via the MCP tool `publish_connector_to_airbyte_registry`.
|
||||||
#
|
#
|
||||||
# Pre-release versions are tagged with the format: {version}-dev.{10-char-git-sha}
|
# Pre-release versions are tagged with the format: {version}-preview.{7-char-git-sha}
|
||||||
# These versions are NOT eligible for semver auto-advancement but ARE available
|
# These versions are NOT eligible for semver auto-advancement but ARE available
|
||||||
# for version pinning via the scoped_configuration API.
|
# for version pinning via the scoped_configuration API.
|
||||||
#
|
#
|
||||||
@@ -66,7 +66,7 @@ jobs:
|
|||||||
- name: Get short SHA
|
- name: Get short SHA
|
||||||
id: get-sha
|
id: get-sha
|
||||||
run: |
|
run: |
|
||||||
SHORT_SHA=$(git rev-parse --short=10 HEAD)
|
SHORT_SHA=$(git rev-parse --short=7 HEAD)
|
||||||
echo "short-sha=$SHORT_SHA" >> $GITHUB_OUTPUT
|
echo "short-sha=$SHORT_SHA" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
- name: Get job variables
|
- name: Get job variables
|
||||||
@@ -135,7 +135,7 @@ jobs:
|
|||||||
> Publishing pre-release build for connector `${{ steps.resolve-connector.outputs.connector-name }}`.
|
> Publishing pre-release build for connector `${{ steps.resolve-connector.outputs.connector-name }}`.
|
||||||
> Branch: `${{ inputs.gitref }}`
|
> Branch: `${{ inputs.gitref }}`
|
||||||
>
|
>
|
||||||
> Pre-release versions will be tagged as `{version}-dev.${{ steps.get-sha.outputs.short-sha }}`
|
> Pre-release versions will be tagged as `{version}-preview.${{ steps.get-sha.outputs.short-sha }}`
|
||||||
> and are available for version pinning via the scoped_configuration API.
|
> and are available for version pinning via the scoped_configuration API.
|
||||||
>
|
>
|
||||||
> [View workflow run](${{ steps.job-vars.outputs.run-url }})
|
> [View workflow run](${{ steps.job-vars.outputs.run-url }})
|
||||||
@@ -147,6 +147,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
connectors: ${{ format('--name={0}', needs.init.outputs.connector-name) }}
|
connectors: ${{ format('--name={0}', needs.init.outputs.connector-name) }}
|
||||||
release-type: pre-release
|
release-type: pre-release
|
||||||
|
gitref: ${{ inputs.gitref }}
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|
||||||
post-completion:
|
post-completion:
|
||||||
@@ -176,13 +177,12 @@ jobs:
|
|||||||
id: message-vars
|
id: message-vars
|
||||||
run: |
|
run: |
|
||||||
CONNECTOR_NAME="${{ needs.init.outputs.connector-name }}"
|
CONNECTOR_NAME="${{ needs.init.outputs.connector-name }}"
|
||||||
SHORT_SHA="${{ needs.init.outputs.short-sha }}"
|
# Use the actual docker-image-tag from the publish workflow output
|
||||||
VERSION="${{ needs.init.outputs.connector-version }}"
|
DOCKER_TAG="${{ needs.publish.outputs.docker-image-tag }}"
|
||||||
|
|
||||||
if [[ -n "$VERSION" ]]; then
|
if [[ -z "$DOCKER_TAG" ]]; then
|
||||||
DOCKER_TAG="${VERSION}-dev.${SHORT_SHA}"
|
echo "::error::docker-image-tag output is missing from publish workflow. This is unexpected."
|
||||||
else
|
exit 1
|
||||||
DOCKER_TAG="{version}-dev.${SHORT_SHA}"
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "connector_name=$CONNECTOR_NAME" >> $GITHUB_OUTPUT
|
echo "connector_name=$CONNECTOR_NAME" >> $GITHUB_OUTPUT
|
||||||
|
|||||||
19
.github/workflows/publish_connectors.yml
vendored
19
.github/workflows/publish_connectors.yml
vendored
@@ -21,6 +21,14 @@ on:
|
|||||||
required: false
|
required: false
|
||||||
default: false
|
default: false
|
||||||
type: boolean
|
type: boolean
|
||||||
|
gitref:
|
||||||
|
description: "Git ref (branch or SHA) to build connectors from. Used by pre-release workflow to build from PR branches."
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
outputs:
|
||||||
|
docker-image-tag:
|
||||||
|
description: "Docker image tag used when publishing. For single-connector callers only; multi-connector callers should not rely on this output."
|
||||||
|
value: ${{ jobs.publish_connector_registry_entries.outputs.docker-image-tag }}
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
inputs:
|
inputs:
|
||||||
connectors:
|
connectors:
|
||||||
@@ -48,6 +56,7 @@ jobs:
|
|||||||
# v4
|
# v4
|
||||||
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
|
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
|
||||||
with:
|
with:
|
||||||
|
ref: ${{ inputs.gitref || '' }}
|
||||||
fetch-depth: 2 # Required so we can conduct a diff from the previous commit to understand what connectors have changed.
|
fetch-depth: 2 # Required so we can conduct a diff from the previous commit to understand what connectors have changed.
|
||||||
submodules: true # Required for the enterprise repo since it uses a submodule that needs to exist for this workflow to run successfully.
|
submodules: true # Required for the enterprise repo since it uses a submodule that needs to exist for this workflow to run successfully.
|
||||||
- name: List connectors to publish [manual]
|
- name: List connectors to publish [manual]
|
||||||
@@ -105,6 +114,7 @@ jobs:
|
|||||||
# v4
|
# v4
|
||||||
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
|
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
|
||||||
with:
|
with:
|
||||||
|
ref: ${{ inputs.gitref || '' }}
|
||||||
fetch-depth: 2 # Required so we can conduct a diff from the previous commit to understand what connectors have changed.
|
fetch-depth: 2 # Required so we can conduct a diff from the previous commit to understand what connectors have changed.
|
||||||
submodules: true # Required for the enterprise repo since it uses a submodule that needs to exist for this workflow to run successfully.
|
submodules: true # Required for the enterprise repo since it uses a submodule that needs to exist for this workflow to run successfully.
|
||||||
|
|
||||||
@@ -250,11 +260,14 @@ jobs:
|
|||||||
max-parallel: 5
|
max-parallel: 5
|
||||||
# Allow all jobs to run, even if one fails
|
# Allow all jobs to run, even if one fails
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
|
outputs:
|
||||||
|
docker-image-tag: ${{ steps.connector-metadata.outputs.docker-image-tag }}
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout Airbyte
|
- name: Checkout Airbyte
|
||||||
# v4
|
# v4
|
||||||
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
|
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
|
||||||
with:
|
with:
|
||||||
|
ref: ${{ inputs.gitref || '' }}
|
||||||
fetch-depth: 2 # Required so we can conduct a diff from the previous commit to understand what connectors have changed.
|
fetch-depth: 2 # Required so we can conduct a diff from the previous commit to understand what connectors have changed.
|
||||||
submodules: true # Required for the enterprise repo since it uses a submodule that needs to exist for this workflow to run successfully.
|
submodules: true # Required for the enterprise repo since it uses a submodule that needs to exist for this workflow to run successfully.
|
||||||
|
|
||||||
@@ -292,8 +305,8 @@ jobs:
|
|||||||
echo "connector-version=$(poe -qq get-version)" | tee -a $GITHUB_OUTPUT
|
echo "connector-version=$(poe -qq get-version)" | tee -a $GITHUB_OUTPUT
|
||||||
CONNECTOR_VERSION=$(poe -qq get-version)
|
CONNECTOR_VERSION=$(poe -qq get-version)
|
||||||
if [[ "${{ inputs.release-type }}" == "pre-release" ]]; then
|
if [[ "${{ inputs.release-type }}" == "pre-release" ]]; then
|
||||||
hash=$(git rev-parse --short=10 HEAD)
|
hash=$(git rev-parse --short=7 HEAD)
|
||||||
echo "docker-image-tag=${CONNECTOR_VERSION}-dev.${hash}" | tee -a $GITHUB_OUTPUT
|
echo "docker-image-tag=${CONNECTOR_VERSION}-preview.${hash}" | tee -a $GITHUB_OUTPUT
|
||||||
echo "release-type-flag=--pre-release" | tee -a $GITHUB_OUTPUT
|
echo "release-type-flag=--pre-release" | tee -a $GITHUB_OUTPUT
|
||||||
else
|
else
|
||||||
echo "docker-image-tag=${CONNECTOR_VERSION}" | tee -a $GITHUB_OUTPUT
|
echo "docker-image-tag=${CONNECTOR_VERSION}" | tee -a $GITHUB_OUTPUT
|
||||||
@@ -349,6 +362,7 @@ jobs:
|
|||||||
# v4
|
# v4
|
||||||
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
|
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
|
||||||
with:
|
with:
|
||||||
|
ref: ${{ inputs.gitref || '' }}
|
||||||
submodules: true # Required for the enterprise repo since it uses a submodule that needs to exist for this workflow to run successfully.
|
submodules: true # Required for the enterprise repo since it uses a submodule that needs to exist for this workflow to run successfully.
|
||||||
- name: Match GitHub User to Slack User
|
- name: Match GitHub User to Slack User
|
||||||
id: match-github-to-slack-user
|
id: match-github-to-slack-user
|
||||||
@@ -381,6 +395,7 @@ jobs:
|
|||||||
# v4
|
# v4
|
||||||
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
|
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955
|
||||||
with:
|
with:
|
||||||
|
ref: ${{ inputs.gitref || '' }}
|
||||||
submodules: true # Required for the enterprise repo since it uses a submodule that needs to exist for this workflow to run successfully.
|
submodules: true # Required for the enterprise repo since it uses a submodule that needs to exist for this workflow to run successfully.
|
||||||
- name: Notify PagerDuty
|
- name: Notify PagerDuty
|
||||||
id: pager-duty
|
id: pager-duty
|
||||||
|
|||||||
4
.github/workflows/slash-commands.yml
vendored
4
.github/workflows/slash-commands.yml
vendored
@@ -35,8 +35,12 @@ jobs:
|
|||||||
issue-type: both
|
issue-type: both
|
||||||
|
|
||||||
commands: |
|
commands: |
|
||||||
|
ai-canary-prerelease
|
||||||
|
ai-prove-fix
|
||||||
|
ai-release-watch
|
||||||
approve-regression-tests
|
approve-regression-tests
|
||||||
bump-bulk-cdk-version
|
bump-bulk-cdk-version
|
||||||
|
bump-progressive-rollout-version
|
||||||
bump-version
|
bump-version
|
||||||
build-connector-images
|
build-connector-images
|
||||||
connector-performance
|
connector-performance
|
||||||
|
|||||||
70
.github/workflows/sync-ai-connector-docs.yml
vendored
Normal file
70
.github/workflows/sync-ai-connector-docs.yml
vendored
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
name: Sync Agent Connector Docs
|
||||||
|
|
||||||
|
on:
|
||||||
|
schedule:
|
||||||
|
- cron: "0 */2 * * *" # Every 2 hours
|
||||||
|
workflow_dispatch: # Manual trigger
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
sync-docs:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout airbyte repo
|
||||||
|
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
|
||||||
|
|
||||||
|
- name: Checkout airbyte-agent-connectors
|
||||||
|
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
|
||||||
|
with:
|
||||||
|
repository: airbytehq/airbyte-agent-connectors
|
||||||
|
path: agent-connectors-source
|
||||||
|
|
||||||
|
- name: Sync connector docs
|
||||||
|
run: |
|
||||||
|
DEST_DIR="docs/ai-agents/connectors"
|
||||||
|
mkdir -p "$DEST_DIR"
|
||||||
|
|
||||||
|
for connector_dir in agent-connectors-source/connectors/*/; do
|
||||||
|
connector=$(basename "$connector_dir")
|
||||||
|
|
||||||
|
# Only delete/recreate the specific connector subdirectory
|
||||||
|
# This leaves any files directly in $DEST_DIR untouched
|
||||||
|
rm -rf "$DEST_DIR/$connector"
|
||||||
|
mkdir -p "$DEST_DIR/$connector"
|
||||||
|
|
||||||
|
# Copy all markdown files for this connector
|
||||||
|
for md_file in "$connector_dir"/*.md; do
|
||||||
|
if [ -f "$md_file" ]; then
|
||||||
|
cp "$md_file" "$DEST_DIR/$connector/"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Synced $(ls -d $DEST_DIR/*/ 2>/dev/null | wc -l) connectors"
|
||||||
|
|
||||||
|
- name: Cleanup temporary checkout
|
||||||
|
run: rm -rf agent-connectors-source
|
||||||
|
|
||||||
|
- name: Authenticate as GitHub App
|
||||||
|
uses: actions/create-github-app-token@v2
|
||||||
|
id: get-app-token
|
||||||
|
with:
|
||||||
|
owner: "airbytehq"
|
||||||
|
repositories: "airbyte"
|
||||||
|
app-id: ${{ secrets.OCTAVIA_BOT_APP_ID }}
|
||||||
|
private-key: ${{ secrets.OCTAVIA_BOT_PRIVATE_KEY }}
|
||||||
|
|
||||||
|
- name: Create PR if changes
|
||||||
|
uses: peter-evans/create-pull-request@0979079bc20c05bbbb590a56c21c4e2b1d1f1bbe # v6
|
||||||
|
with:
|
||||||
|
token: ${{ steps.get-app-token.outputs.token }}
|
||||||
|
commit-message: "docs: sync agent connector docs from airbyte-agent-connectors repo"
|
||||||
|
branch: auto-sync-ai-connector-docs
|
||||||
|
delete-branch: true
|
||||||
|
title: "docs: sync agent connector docs from airbyte-agent-connectors repo"
|
||||||
|
body: |
|
||||||
|
Automated sync of agent connector docs from airbyte-agent-connectors.
|
||||||
|
|
||||||
|
This PR was automatically created by the sync-agent-connector-docs workflow.
|
||||||
|
labels: |
|
||||||
|
documentation
|
||||||
|
auto-merge
|
||||||
3
.markdownlintignore
Normal file
3
.markdownlintignore
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
# Ignore auto-generated connector documentation files synced from airbyte-agent-connectors repo
|
||||||
|
# These files are generated and have formatting that doesn't conform to markdownlint rules
|
||||||
|
docs/ai-agents/connectors/**
|
||||||
@@ -1,3 +1,34 @@
|
|||||||
|
## Version 0.1.91
|
||||||
|
|
||||||
|
load cdk: upsert records test uses proper target schema
|
||||||
|
|
||||||
|
## Version 0.1.90
|
||||||
|
|
||||||
|
load cdk: components tests: data coercion tests cover all data types
|
||||||
|
|
||||||
|
## Version 0.1.89
|
||||||
|
|
||||||
|
load cdk: components tests: data coercion tests for int+number
|
||||||
|
|
||||||
|
## Version 0.1.88
|
||||||
|
|
||||||
|
**Load CDK**
|
||||||
|
|
||||||
|
* Add CDC_CURSOR_COLUMN_NAME constant.
|
||||||
|
|
||||||
|
## Version 0.1.87
|
||||||
|
|
||||||
|
**Load CDK**
|
||||||
|
|
||||||
|
* Properly call NamespaceMapper before calculating final table names.
|
||||||
|
|
||||||
|
## Version 0.1.86
|
||||||
|
|
||||||
|
**Load CDK**
|
||||||
|
|
||||||
|
* Adds toFinalSchema "escape hatch" for final table schema munging
|
||||||
|
* Refactored Component test fixtures to require explicit StreamTableSchema creation using TableSchemaFactory
|
||||||
|
|
||||||
## Version 0.1.85
|
## Version 0.1.85
|
||||||
|
|
||||||
**Extract CDK**
|
**Extract CDK**
|
||||||
|
|||||||
@@ -104,17 +104,22 @@ class DefaultDestinationCatalogFactory {
|
|||||||
catalog: ConfiguredAirbyteCatalog,
|
catalog: ConfiguredAirbyteCatalog,
|
||||||
streamFactory: DestinationStreamFactory,
|
streamFactory: DestinationStreamFactory,
|
||||||
tableNameResolver: TableNameResolver,
|
tableNameResolver: TableNameResolver,
|
||||||
|
namespaceMapper: NamespaceMapper,
|
||||||
): DestinationCatalog {
|
): DestinationCatalog {
|
||||||
val descriptors =
|
// we resolve the table names with the properly mapped descriptors
|
||||||
catalog.streams
|
val mappedDescriptors =
|
||||||
.map { DestinationStream.Descriptor(it.stream.namespace, it.stream.name) }
|
catalog.streams.map { namespaceMapper.map(it.stream.namespace, it.stream.name) }.toSet()
|
||||||
.toSet()
|
val names = tableNameResolver.getTableNameMapping(mappedDescriptors)
|
||||||
val names = tableNameResolver.getTableNameMapping(descriptors)
|
|
||||||
|
require(
|
||||||
|
names.size == catalog.streams.size,
|
||||||
|
{ "Invariant violation: An incomplete table name mapping was generated." }
|
||||||
|
)
|
||||||
|
|
||||||
return DestinationCatalog(
|
return DestinationCatalog(
|
||||||
streams =
|
streams =
|
||||||
catalog.streams.map {
|
catalog.streams.map {
|
||||||
val key = DestinationStream.Descriptor(it.stream.namespace, it.stream.name)
|
val key = namespaceMapper.map(it.stream.namespace, it.stream.name)
|
||||||
streamFactory.make(it, names[key]!!)
|
streamFactory.make(it, names[key]!!)
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -42,10 +42,13 @@ class TableSchemaFactory(
|
|||||||
finalSchema = finalSchema,
|
finalSchema = finalSchema,
|
||||||
)
|
)
|
||||||
|
|
||||||
return StreamTableSchema(
|
val tableSchema =
|
||||||
tableNames,
|
StreamTableSchema(
|
||||||
columnSchema,
|
tableNames,
|
||||||
importType,
|
columnSchema,
|
||||||
)
|
importType,
|
||||||
|
)
|
||||||
|
|
||||||
|
return mapper.toFinalSchema(tableSchema)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,17 +7,64 @@ package io.airbyte.cdk.load.schema
|
|||||||
import io.airbyte.cdk.load.command.DestinationStream
|
import io.airbyte.cdk.load.command.DestinationStream
|
||||||
import io.airbyte.cdk.load.component.ColumnType
|
import io.airbyte.cdk.load.component.ColumnType
|
||||||
import io.airbyte.cdk.load.data.FieldType
|
import io.airbyte.cdk.load.data.FieldType
|
||||||
|
import io.airbyte.cdk.load.schema.model.StreamTableSchema
|
||||||
import io.airbyte.cdk.load.schema.model.TableName
|
import io.airbyte.cdk.load.schema.model.TableName
|
||||||
|
|
||||||
/** Transforms input schema elements to destination-specific naming and type conventions. */
|
/** Transforms input schema elements to destination-specific naming and type conventions. */
|
||||||
interface TableSchemaMapper {
|
interface TableSchemaMapper {
|
||||||
|
/**
|
||||||
|
* Converts a stream descriptor to the final destination table name.
|
||||||
|
*
|
||||||
|
* @param desc The stream descriptor containing namespace and name information
|
||||||
|
* @return The mapped final table name in the destination system
|
||||||
|
*/
|
||||||
fun toFinalTableName(desc: DestinationStream.Descriptor): TableName
|
fun toFinalTableName(desc: DestinationStream.Descriptor): TableName
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generates a temporary table name based on the provided final table name. Temporary tables are
|
||||||
|
* typically used before data is moved to final tables to avoid data downtime.
|
||||||
|
*
|
||||||
|
* @param tableName The final table name to base the temporary name on
|
||||||
|
* @return The temporary table name
|
||||||
|
*/
|
||||||
fun toTempTableName(tableName: TableName): TableName
|
fun toTempTableName(tableName: TableName): TableName
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Transforms a column name from the input schema to comply with destination naming conventions.
|
||||||
|
* This may include handling special characters, case transformations, or length limitations.
|
||||||
|
*
|
||||||
|
* @param name The original column name from the input schema
|
||||||
|
* @return The destination-compatible column name
|
||||||
|
*/
|
||||||
fun toColumnName(name: String): String
|
fun toColumnName(name: String): String
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts an Airbyte field type to the corresponding destination-specific column type. This
|
||||||
|
* handles mapping of data types from Airbyte's type system to the destination database's type
|
||||||
|
* system.
|
||||||
|
*
|
||||||
|
* @param fieldType The Airbyte field type to convert
|
||||||
|
* @return The destination-specific column type representation
|
||||||
|
*/
|
||||||
fun toColumnType(fieldType: FieldType): ColumnType
|
fun toColumnType(fieldType: FieldType): ColumnType
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs any final transformations on the complete table schema before it's used in the
|
||||||
|
* destination. By default, returns the schema unchanged. Override to apply destination-specific
|
||||||
|
* schema modifications.
|
||||||
|
*
|
||||||
|
* @param tableSchema The complete stream table schema
|
||||||
|
* @return The finalized schema ready for use in the destination
|
||||||
|
*/
|
||||||
|
fun toFinalSchema(tableSchema: StreamTableSchema) = tableSchema
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determines if two column names conflict according to destination-specific rules. By default,
|
||||||
|
* performs case-insensitive comparison. Override for different conflict detection logic.
|
||||||
|
*
|
||||||
|
* @param a First column name
|
||||||
|
* @param b Second column name
|
||||||
|
* @return true if the column names conflict, false otherwise
|
||||||
|
*/
|
||||||
fun colsConflict(a: String, b: String): Boolean = a.equals(b, ignoreCase = true)
|
fun colsConflict(a: String, b: String): Boolean = a.equals(b, ignoreCase = true)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,4 +4,13 @@
|
|||||||
|
|
||||||
package io.airbyte.cdk.load.table
|
package io.airbyte.cdk.load.table
|
||||||
|
|
||||||
|
/**
|
||||||
|
* CDC meta column names.
|
||||||
|
*
|
||||||
|
* Note: These CDC column names are brittle as they are separate yet coupled to the logic sources
|
||||||
|
* use to generate these column names. See
|
||||||
|
* [io.airbyte.integrations.source.mssql.MsSqlSourceOperations.MsSqlServerCdcMetaFields] for an
|
||||||
|
* example.
|
||||||
|
*/
|
||||||
const val CDC_DELETED_AT_COLUMN = "_ab_cdc_deleted_at"
|
const val CDC_DELETED_AT_COLUMN = "_ab_cdc_deleted_at"
|
||||||
|
const val CDC_CURSOR_COLUMN = "_ab_cdc_cursor"
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import io.airbyte.cdk.load.component.ColumnType
|
|||||||
import io.airbyte.cdk.load.data.FieldType
|
import io.airbyte.cdk.load.data.FieldType
|
||||||
import io.airbyte.cdk.load.data.IntegerType
|
import io.airbyte.cdk.load.data.IntegerType
|
||||||
import io.airbyte.cdk.load.data.StringType
|
import io.airbyte.cdk.load.data.StringType
|
||||||
|
import io.airbyte.cdk.load.schema.model.StreamTableSchema
|
||||||
import io.airbyte.cdk.load.schema.model.TableName
|
import io.airbyte.cdk.load.schema.model.TableName
|
||||||
import io.mockk.every
|
import io.mockk.every
|
||||||
import io.mockk.impl.annotations.MockK
|
import io.mockk.impl.annotations.MockK
|
||||||
@@ -42,6 +43,7 @@ class TableSchemaFactoryTest {
|
|||||||
every { mapper.toTempTableName(finalTableName) } returns tempTableName
|
every { mapper.toTempTableName(finalTableName) } returns tempTableName
|
||||||
every { colNameResolver.getColumnNameMapping(inputSchema.keys) } returns columnNameMapping
|
every { colNameResolver.getColumnNameMapping(inputSchema.keys) } returns columnNameMapping
|
||||||
every { mapper.toColumnType(any()) } returns ColumnType("test_type", false)
|
every { mapper.toColumnType(any()) } returns ColumnType("test_type", false)
|
||||||
|
every { mapper.toFinalSchema(any()) } answers { firstArg<StreamTableSchema>() }
|
||||||
|
|
||||||
val result = factory.make(finalTableName, inputSchema, importType)
|
val result = factory.make(finalTableName, inputSchema, importType)
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,859 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package io.airbyte.cdk.load.component
|
||||||
|
|
||||||
|
import io.airbyte.cdk.load.data.AirbyteValue
|
||||||
|
import io.airbyte.cdk.load.data.ArrayValue
|
||||||
|
import io.airbyte.cdk.load.data.DateValue
|
||||||
|
import io.airbyte.cdk.load.data.IntegerValue
|
||||||
|
import io.airbyte.cdk.load.data.NullValue
|
||||||
|
import io.airbyte.cdk.load.data.NumberValue
|
||||||
|
import io.airbyte.cdk.load.data.ObjectValue
|
||||||
|
import io.airbyte.cdk.load.data.StringValue
|
||||||
|
import io.airbyte.cdk.load.data.TimeWithTimezoneValue
|
||||||
|
import io.airbyte.cdk.load.data.TimeWithoutTimezoneValue
|
||||||
|
import io.airbyte.cdk.load.data.TimestampWithTimezoneValue
|
||||||
|
import io.airbyte.cdk.load.data.TimestampWithoutTimezoneValue
|
||||||
|
import io.airbyte.cdk.load.dataflow.transform.ValueCoercer
|
||||||
|
import io.airbyte.cdk.load.util.serializeToString
|
||||||
|
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange.Reason
|
||||||
|
import java.math.BigDecimal
|
||||||
|
import java.math.BigInteger
|
||||||
|
import java.time.LocalDate
|
||||||
|
import java.time.LocalDateTime
|
||||||
|
import java.time.OffsetDateTime
|
||||||
|
import java.time.format.DateTimeFormatter
|
||||||
|
import java.time.format.DateTimeFormatterBuilder
|
||||||
|
import java.time.format.SignStyle
|
||||||
|
import java.time.temporal.ChronoField
|
||||||
|
import org.junit.jupiter.params.provider.Arguments
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This file defines "interesting values" for all data types, along with expected behavior for those values.
|
||||||
|
* You're free to define your own values/behavior depending on the destination, but it's recommended
|
||||||
|
* that you try to match behavior to an existing fixture.
|
||||||
|
*
|
||||||
|
* Classes also include some convenience functions for JUnit. For example, you could annotate your
|
||||||
|
* method with:
|
||||||
|
* ```kotlin
|
||||||
|
* @ParameterizedTest
|
||||||
|
* @MethodSource("io.airbyte.cdk.load.component.DataCoercionIntegerFixtures#int64")
|
||||||
|
* ```
|
||||||
|
*
|
||||||
|
* By convention, all fixtures are declared as:
|
||||||
|
* 1. One or more `val <name>: List<Pair<AirbyteValue, Any?>>` (each pair representing the input value,
|
||||||
|
* and the expected output value)
|
||||||
|
* 2. One or more `fun <name>(): List<Arguments> = <name>.toArgs()`, which can be provided to JUnit's MethodSource
|
||||||
|
*
|
||||||
|
* If you need to mutate fixtures in some way, you should reference the `val`, and use the `toArgs()`
|
||||||
|
* extension function to convert it to JUnit's Arguments class. See [DataCoercionIntegerFixtures.int64AsBigInteger]
|
||||||
|
* for an example.
|
||||||
|
*/
|
||||||
|
|
||||||
|
object DataCoercionIntegerFixtures {
|
||||||
|
// "9".repeat(38)
|
||||||
|
val numeric38_0Max = bigint("99999999999999999999999999999999999999")
|
||||||
|
val numeric38_0Min = bigint("-99999999999999999999999999999999999999")
|
||||||
|
|
||||||
|
const val ZERO = "0"
|
||||||
|
const val ONE = "1"
|
||||||
|
const val NEGATIVE_ONE = "-1"
|
||||||
|
const val FORTY_TWO = "42"
|
||||||
|
const val NEGATIVE_FORTY_TWO = "-42"
|
||||||
|
const val INT32_MAX = "int32 max"
|
||||||
|
const val INT32_MIN = "int32 min"
|
||||||
|
const val INT32_MAX_PLUS_ONE = "int32_max + 1"
|
||||||
|
const val INT32_MIN_MINUS_ONE = "int32_min - 1"
|
||||||
|
const val INT64_MAX = "int64 max"
|
||||||
|
const val INT64_MIN = "int64 min"
|
||||||
|
const val INT64_MAX_PLUS_ONE = "int64_max + 1"
|
||||||
|
const val INT64_MIN_MINUS_1 = "int64_min - 1"
|
||||||
|
const val NUMERIC_38_0_MAX = "numeric(38,0) max"
|
||||||
|
const val NUMERIC_38_0_MIN = "numeric(38,0) min"
|
||||||
|
const val NUMERIC_38_0_MAX_PLUS_ONE = "numeric(38,0)_max + 1"
|
||||||
|
const val NUMERIC_38_0_MIN_MINUS_ONE = "numeric(38,0)_min - 1"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Many destinations use int64 to represent integers. In this case, we null out any value beyond
|
||||||
|
* Long.MIN/MAX_VALUE.
|
||||||
|
*/
|
||||||
|
val int64 =
|
||||||
|
listOf(
|
||||||
|
case(NULL, NullValue, null),
|
||||||
|
case(ZERO, IntegerValue(0), 0L),
|
||||||
|
case(ONE, IntegerValue(1), 1L),
|
||||||
|
case(NEGATIVE_ONE, IntegerValue(-1), -1L),
|
||||||
|
case(FORTY_TWO, IntegerValue(42), 42L),
|
||||||
|
case(NEGATIVE_FORTY_TWO, IntegerValue(-42), -42L),
|
||||||
|
// int32 bounds, and slightly out of bounds
|
||||||
|
case(INT32_MAX, IntegerValue(Integer.MAX_VALUE.toLong()), Integer.MAX_VALUE.toLong()),
|
||||||
|
case(INT32_MIN, IntegerValue(Integer.MIN_VALUE.toLong()), Integer.MIN_VALUE.toLong()),
|
||||||
|
case(
|
||||||
|
INT32_MAX_PLUS_ONE,
|
||||||
|
IntegerValue(Integer.MAX_VALUE.toLong() + 1),
|
||||||
|
Integer.MAX_VALUE.toLong() + 1
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
INT32_MIN_MINUS_ONE,
|
||||||
|
IntegerValue(Integer.MIN_VALUE.toLong() - 1),
|
||||||
|
Integer.MIN_VALUE.toLong() - 1
|
||||||
|
),
|
||||||
|
// int64 bounds, and slightly out of bounds
|
||||||
|
case(INT64_MAX, IntegerValue(Long.MAX_VALUE), Long.MAX_VALUE),
|
||||||
|
case(INT64_MIN, IntegerValue(Long.MIN_VALUE), Long.MIN_VALUE),
|
||||||
|
// values out of int64 bounds are nulled
|
||||||
|
case(
|
||||||
|
INT64_MAX_PLUS_ONE,
|
||||||
|
IntegerValue(bigint(Long.MAX_VALUE) + BigInteger.ONE),
|
||||||
|
null,
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
INT64_MIN_MINUS_1,
|
||||||
|
IntegerValue(bigint(Long.MIN_VALUE) - BigInteger.ONE),
|
||||||
|
null,
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION
|
||||||
|
),
|
||||||
|
// NUMERIC(38, 9) bounds, and slightly out of bounds
|
||||||
|
// (these are all out of bounds for an int64 value, so they all get nulled)
|
||||||
|
case(
|
||||||
|
NUMERIC_38_0_MAX,
|
||||||
|
IntegerValue(numeric38_0Max),
|
||||||
|
null,
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
NUMERIC_38_0_MIN,
|
||||||
|
IntegerValue(numeric38_0Min),
|
||||||
|
null,
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
NUMERIC_38_0_MAX_PLUS_ONE,
|
||||||
|
IntegerValue(numeric38_0Max + BigInteger.ONE),
|
||||||
|
null,
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
NUMERIC_38_0_MIN_MINUS_ONE,
|
||||||
|
IntegerValue(numeric38_0Min - BigInteger.ONE),
|
||||||
|
null,
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Many destination warehouses represent integers as a fixed-point type with 38 digits of
|
||||||
|
* precision. In this case, we only need to null out numbers larger than `1e38 - 1` / smaller
|
||||||
|
* than `-1e38 + 1`.
|
||||||
|
*/
|
||||||
|
val numeric38_0 =
|
||||||
|
listOf(
|
||||||
|
case(NULL, NullValue, null),
|
||||||
|
case(ZERO, IntegerValue(0), bigint(0L)),
|
||||||
|
case(ONE, IntegerValue(1), bigint(1L)),
|
||||||
|
case(NEGATIVE_ONE, IntegerValue(-1), bigint(-1L)),
|
||||||
|
case(FORTY_TWO, IntegerValue(42), bigint(42L)),
|
||||||
|
case(NEGATIVE_FORTY_TWO, IntegerValue(-42), bigint(-42L)),
|
||||||
|
// int32 bounds, and slightly out of bounds
|
||||||
|
case(
|
||||||
|
INT32_MAX,
|
||||||
|
IntegerValue(Integer.MAX_VALUE.toLong()),
|
||||||
|
bigint(Integer.MAX_VALUE.toLong())
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
INT32_MIN,
|
||||||
|
IntegerValue(Integer.MIN_VALUE.toLong()),
|
||||||
|
bigint(Integer.MIN_VALUE.toLong())
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
INT32_MAX_PLUS_ONE,
|
||||||
|
IntegerValue(Integer.MAX_VALUE.toLong() + 1),
|
||||||
|
bigint(Integer.MAX_VALUE.toLong() + 1)
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
INT32_MIN_MINUS_ONE,
|
||||||
|
IntegerValue(Integer.MIN_VALUE.toLong() - 1),
|
||||||
|
bigint(Integer.MIN_VALUE.toLong() - 1)
|
||||||
|
),
|
||||||
|
// int64 bounds, and slightly out of bounds
|
||||||
|
case(INT64_MAX, IntegerValue(Long.MAX_VALUE), bigint(Long.MAX_VALUE)),
|
||||||
|
case(INT64_MIN, IntegerValue(Long.MIN_VALUE), bigint(Long.MIN_VALUE)),
|
||||||
|
case(
|
||||||
|
INT64_MAX_PLUS_ONE,
|
||||||
|
IntegerValue(bigint(Long.MAX_VALUE) + BigInteger.ONE),
|
||||||
|
bigint(Long.MAX_VALUE) + BigInteger.ONE
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
INT64_MIN_MINUS_1,
|
||||||
|
IntegerValue(bigint(Long.MIN_VALUE) - BigInteger.ONE),
|
||||||
|
bigint(Long.MIN_VALUE) - BigInteger.ONE
|
||||||
|
),
|
||||||
|
// NUMERIC(38, 9) bounds, and slightly out of bounds
|
||||||
|
case(NUMERIC_38_0_MAX, IntegerValue(numeric38_0Max), numeric38_0Max),
|
||||||
|
case(NUMERIC_38_0_MIN, IntegerValue(numeric38_0Min), numeric38_0Min),
|
||||||
|
// These values exceed the 38-digit range, so they get nulled out
|
||||||
|
case(
|
||||||
|
NUMERIC_38_0_MAX_PLUS_ONE,
|
||||||
|
IntegerValue(numeric38_0Max + BigInteger.ONE),
|
||||||
|
null,
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
NUMERIC_38_0_MIN_MINUS_ONE,
|
||||||
|
IntegerValue(numeric38_0Min - BigInteger.ONE),
|
||||||
|
null,
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
@JvmStatic fun int64() = int64.toArgs()
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convenience fixture if your [TestTableOperationsClient] returns integers as [BigInteger]
|
||||||
|
* rather than [Long].
|
||||||
|
*/
|
||||||
|
@JvmStatic
|
||||||
|
fun int64AsBigInteger() =
|
||||||
|
int64.map { it.copy(outputValue = it.outputValue?.let { bigint(it as Long) }) }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convenience fixture if your [TestTableOperationsClient] returns integers as [BigDecimal]
|
||||||
|
* rather than [Long].
|
||||||
|
*/
|
||||||
|
@JvmStatic
|
||||||
|
fun int64AsBigDecimal() =
|
||||||
|
int64.map { it.copy(outputValue = it.outputValue?.let { BigDecimal.valueOf(it as Long) }) }
|
||||||
|
|
||||||
|
@JvmStatic fun numeric38_0() = numeric38_0.toArgs()
|
||||||
|
}
|
||||||
|
|
||||||
|
object DataCoercionNumberFixtures {
|
||||||
|
val numeric38_9Max = bigdec("99999999999999999999999999999.999999999")
|
||||||
|
val numeric38_9Min = bigdec("-99999999999999999999999999999.999999999")
|
||||||
|
|
||||||
|
const val ZERO = "0"
|
||||||
|
const val ONE = "1"
|
||||||
|
const val NEGATIVE_ONE = "-1"
|
||||||
|
const val ONE_HUNDRED_TWENTY_THREE_POINT_FOUR = "123.4"
|
||||||
|
const val NEGATIVE_ONE_HUNDRED_TWENTY_THREE_POINT_FOUR = "123.4"
|
||||||
|
const val POSITIVE_HIGH_PRECISION_FLOAT = "positive high-precision float"
|
||||||
|
const val NEGATIVE_HIGH_PRECISION_FLOAT = "negative high-precision float"
|
||||||
|
const val NUMERIC_38_9_MAX = "numeric(38,9) max"
|
||||||
|
const val NUMERIC_38_9_MIN = "numeric(38,9) min"
|
||||||
|
const val SMALLEST_POSITIVE_FLOAT32 = "smallest positive float32"
|
||||||
|
const val SMALLEST_NEGATIVE_FLOAT32 = "smallest negative float32"
|
||||||
|
const val LARGEST_POSITIVE_FLOAT32 = "largest positive float32"
|
||||||
|
const val LARGEST_NEGATIVE_FLOAT32 = "largest negative float32"
|
||||||
|
const val SMALLEST_POSITIVE_FLOAT64 = "smallest positive float64"
|
||||||
|
const val SMALLEST_NEGATIVE_FLOAT64 = "smallest negative float64"
|
||||||
|
const val LARGEST_POSITIVE_FLOAT64 = "largest positive float64"
|
||||||
|
const val LARGEST_NEGATIVE_FLOAT64 = "largest negative float64"
|
||||||
|
const val SLIGHTLY_ABOVE_LARGEST_POSITIVE_FLOAT64 = "slightly above largest positive float64"
|
||||||
|
const val SLIGHTLY_BELOW_LARGEST_NEGATIVE_FLOAT64 = "slightly below largest negative float64"
|
||||||
|
|
||||||
|
val float64 =
|
||||||
|
listOf(
|
||||||
|
case(NULL, NullValue, null),
|
||||||
|
case(ZERO, NumberValue(bigdec(0)), 0.0),
|
||||||
|
case(ONE, NumberValue(bigdec(1)), 1.0),
|
||||||
|
case(NEGATIVE_ONE, NumberValue(bigdec(-1)), -1.0),
|
||||||
|
// This value isn't exactly representable as a float64
|
||||||
|
// (the exact value is `123.400000000000005684341886080801486968994140625`)
|
||||||
|
// but we should preserve the canonical representation
|
||||||
|
case(ONE_HUNDRED_TWENTY_THREE_POINT_FOUR, NumberValue(bigdec("123.4")), 123.4),
|
||||||
|
case(
|
||||||
|
NEGATIVE_ONE_HUNDRED_TWENTY_THREE_POINT_FOUR,
|
||||||
|
NumberValue(bigdec("-123.4")),
|
||||||
|
-123.4
|
||||||
|
),
|
||||||
|
// These values have too much precision for a float64, so we round them
|
||||||
|
case(
|
||||||
|
POSITIVE_HIGH_PRECISION_FLOAT,
|
||||||
|
NumberValue(bigdec("1234567890.1234567890123456789")),
|
||||||
|
1234567890.1234567,
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
NEGATIVE_HIGH_PRECISION_FLOAT,
|
||||||
|
NumberValue(bigdec("-1234567890.1234567890123456789")),
|
||||||
|
-1234567890.1234567,
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
NUMERIC_38_9_MAX,
|
||||||
|
NumberValue(numeric38_9Max),
|
||||||
|
1.0E29,
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
NUMERIC_38_9_MIN,
|
||||||
|
NumberValue(numeric38_9Min),
|
||||||
|
-1.0E29,
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION
|
||||||
|
),
|
||||||
|
// min/max_value are all positive values, so we need to manually test their negative
|
||||||
|
// version
|
||||||
|
case(
|
||||||
|
SMALLEST_POSITIVE_FLOAT32,
|
||||||
|
NumberValue(bigdec(Float.MIN_VALUE.toDouble())),
|
||||||
|
Float.MIN_VALUE.toDouble()
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
SMALLEST_NEGATIVE_FLOAT32,
|
||||||
|
NumberValue(bigdec(-Float.MIN_VALUE.toDouble())),
|
||||||
|
-Float.MIN_VALUE.toDouble()
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
LARGEST_POSITIVE_FLOAT32,
|
||||||
|
NumberValue(bigdec(Float.MAX_VALUE.toDouble())),
|
||||||
|
Float.MAX_VALUE.toDouble()
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
LARGEST_NEGATIVE_FLOAT32,
|
||||||
|
NumberValue(bigdec(-Float.MAX_VALUE.toDouble())),
|
||||||
|
-Float.MAX_VALUE.toDouble()
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
SMALLEST_POSITIVE_FLOAT64,
|
||||||
|
NumberValue(bigdec(Double.MIN_VALUE)),
|
||||||
|
Double.MIN_VALUE
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
SMALLEST_NEGATIVE_FLOAT64,
|
||||||
|
NumberValue(bigdec(-Double.MIN_VALUE)),
|
||||||
|
-Double.MIN_VALUE
|
||||||
|
),
|
||||||
|
case(LARGEST_POSITIVE_FLOAT64, NumberValue(bigdec(Double.MAX_VALUE)), Double.MAX_VALUE),
|
||||||
|
case(
|
||||||
|
LARGEST_NEGATIVE_FLOAT64,
|
||||||
|
NumberValue(bigdec(-Double.MAX_VALUE)),
|
||||||
|
-Double.MAX_VALUE
|
||||||
|
),
|
||||||
|
// These values are out of bounds, so we null them
|
||||||
|
case(
|
||||||
|
SLIGHTLY_ABOVE_LARGEST_POSITIVE_FLOAT64,
|
||||||
|
NumberValue(bigdec(Double.MAX_VALUE) + bigdec(Double.MIN_VALUE)),
|
||||||
|
null,
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
SLIGHTLY_BELOW_LARGEST_NEGATIVE_FLOAT64,
|
||||||
|
NumberValue(bigdec(-Double.MAX_VALUE) - bigdec(Double.MIN_VALUE)),
|
||||||
|
null,
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
val numeric38_9 =
|
||||||
|
listOf(
|
||||||
|
case(NULL, NullValue, null),
|
||||||
|
case(ZERO, NumberValue(bigdec(0)), bigdec(0.0)),
|
||||||
|
case(ONE, NumberValue(bigdec(1)), bigdec(1.0)),
|
||||||
|
case(NEGATIVE_ONE, NumberValue(bigdec(-1)), bigdec(-1.0)),
|
||||||
|
// This value isn't exactly representable as a float64
|
||||||
|
// (the exact value is `123.400000000000005684341886080801486968994140625`)
|
||||||
|
// but it's perfectly fine as a numeric(38, 9)
|
||||||
|
case(
|
||||||
|
ONE_HUNDRED_TWENTY_THREE_POINT_FOUR,
|
||||||
|
NumberValue(bigdec("123.4")),
|
||||||
|
bigdec("123.4")
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
NEGATIVE_ONE_HUNDRED_TWENTY_THREE_POINT_FOUR,
|
||||||
|
NumberValue(bigdec("-123.4")),
|
||||||
|
bigdec("-123.4")
|
||||||
|
),
|
||||||
|
// These values have too much precision for a numeric(38, 9), so we round them
|
||||||
|
case(
|
||||||
|
POSITIVE_HIGH_PRECISION_FLOAT,
|
||||||
|
NumberValue(bigdec("1234567890.1234567890123456789")),
|
||||||
|
bigdec("1234567890.123456789"),
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
NEGATIVE_HIGH_PRECISION_FLOAT,
|
||||||
|
NumberValue(bigdec("-1234567890.1234567890123456789")),
|
||||||
|
bigdec("-1234567890.123456789"),
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
SMALLEST_POSITIVE_FLOAT32,
|
||||||
|
NumberValue(bigdec(Float.MIN_VALUE.toDouble())),
|
||||||
|
bigdec(0),
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
SMALLEST_NEGATIVE_FLOAT32,
|
||||||
|
NumberValue(bigdec(-Float.MIN_VALUE.toDouble())),
|
||||||
|
bigdec(0),
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
SMALLEST_POSITIVE_FLOAT64,
|
||||||
|
NumberValue(bigdec(Double.MIN_VALUE)),
|
||||||
|
bigdec(0),
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
SMALLEST_NEGATIVE_FLOAT64,
|
||||||
|
NumberValue(bigdec(-Double.MIN_VALUE)),
|
||||||
|
bigdec(0),
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION
|
||||||
|
),
|
||||||
|
// numeric bounds are perfectly fine
|
||||||
|
case(NUMERIC_38_9_MAX, NumberValue(numeric38_9Max), numeric38_9Max),
|
||||||
|
case(NUMERIC_38_9_MIN, NumberValue(numeric38_9Min), numeric38_9Min),
|
||||||
|
// These values are out of bounds, so we null them
|
||||||
|
case(
|
||||||
|
LARGEST_POSITIVE_FLOAT32,
|
||||||
|
NumberValue(bigdec(Float.MAX_VALUE.toDouble())),
|
||||||
|
null,
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
LARGEST_NEGATIVE_FLOAT32,
|
||||||
|
NumberValue(bigdec(-Float.MAX_VALUE.toDouble())),
|
||||||
|
null,
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
LARGEST_POSITIVE_FLOAT64,
|
||||||
|
NumberValue(bigdec(Double.MAX_VALUE)),
|
||||||
|
null,
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
LARGEST_NEGATIVE_FLOAT64,
|
||||||
|
NumberValue(bigdec(-Double.MAX_VALUE)),
|
||||||
|
null,
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
SLIGHTLY_ABOVE_LARGEST_POSITIVE_FLOAT64,
|
||||||
|
NumberValue(bigdec(Double.MAX_VALUE) + bigdec(Double.MIN_VALUE)),
|
||||||
|
null,
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
SLIGHTLY_BELOW_LARGEST_NEGATIVE_FLOAT64,
|
||||||
|
NumberValue(bigdec(-Double.MAX_VALUE) - bigdec(Double.MIN_VALUE)),
|
||||||
|
null,
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION
|
||||||
|
),
|
||||||
|
)
|
||||||
|
.map { it.copy(outputValue = (it.outputValue as BigDecimal?)?.setScale(9)) }
|
||||||
|
|
||||||
|
@JvmStatic fun float64() = float64.toArgs()
|
||||||
|
@JvmStatic fun numeric38_9() = numeric38_9.toArgs()
|
||||||
|
}
|
||||||
|
|
||||||
|
const val SIMPLE_TIMESTAMP = "simple timestamp"
|
||||||
|
const val UNIX_EPOCH = "unix epoch"
|
||||||
|
const val MINIMUM_TIMESTAMP = "minimum timestamp"
|
||||||
|
const val MAXIMUM_TIMESTAMP = "maximum timestamp"
|
||||||
|
const val OUT_OF_RANGE_TIMESTAMP = "out of range timestamp"
|
||||||
|
const val HIGH_PRECISION_TIMESTAMP = "high-precision timestamp"
|
||||||
|
|
||||||
|
object DataCoercionTimestampTzFixtures {
|
||||||
|
/**
|
||||||
|
* Many warehouses support timestamps between years 0001 - 9999.
|
||||||
|
*
|
||||||
|
* Depending on the exact warehouse, you may need to tweak the precision on some values. For
|
||||||
|
* example, Snowflake supports nanoseconds-precision timestamps (9 decimal points), but Bigquery
|
||||||
|
* only supports microseconds-precision (6 decimal points). Bigquery would probably do something
|
||||||
|
* like:
|
||||||
|
* ```kotlin
|
||||||
|
* DataCoercionNumberFixtures.traditionalWarehouse
|
||||||
|
* .map {
|
||||||
|
* when (it.name) {
|
||||||
|
* "maximum AD timestamp" -> it.copy(
|
||||||
|
* inputValue = TimestampWithTimezoneValue("9999-12-31T23:59:59.999999Z"),
|
||||||
|
* outputValue = OffsetDateTime.parse("9999-12-31T23:59:59.999999Z"),
|
||||||
|
* changeReason = Reason.DESTINATION_FIELD_SIZE_LIMITATION,
|
||||||
|
* )
|
||||||
|
* "high-precision timestamp" -> it.copy(
|
||||||
|
* outputValue = OffsetDateTime.parse("2025-01-23T01:01:00.123456Z"),
|
||||||
|
* changeReason = Reason.DESTINATION_FIELD_SIZE_LIMITATION,
|
||||||
|
* )
|
||||||
|
* }
|
||||||
|
* }
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
val commonWarehouse =
|
||||||
|
listOf(
|
||||||
|
case(NULL, NullValue, null),
|
||||||
|
case(
|
||||||
|
SIMPLE_TIMESTAMP,
|
||||||
|
TimestampWithTimezoneValue("2025-01-23T12:34:56.789Z"),
|
||||||
|
"2025-01-23T12:34:56.789Z",
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
UNIX_EPOCH,
|
||||||
|
TimestampWithTimezoneValue("1970-01-01T00:00:00Z"),
|
||||||
|
"1970-01-01T00:00:00Z",
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
MINIMUM_TIMESTAMP,
|
||||||
|
TimestampWithTimezoneValue("0001-01-01T00:00:00Z"),
|
||||||
|
"0001-01-01T00:00:00Z",
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
MAXIMUM_TIMESTAMP,
|
||||||
|
TimestampWithTimezoneValue("9999-12-31T23:59:59.999999999Z"),
|
||||||
|
"9999-12-31T23:59:59.999999999Z",
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
OUT_OF_RANGE_TIMESTAMP,
|
||||||
|
TimestampWithTimezoneValue(odt("10000-01-01T00:00Z")),
|
||||||
|
null,
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION,
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
HIGH_PRECISION_TIMESTAMP,
|
||||||
|
TimestampWithTimezoneValue("2025-01-23T01:01:00.123456789Z"),
|
||||||
|
"2025-01-23T01:01:00.123456789Z",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
@JvmStatic fun commonWarehouse() = commonWarehouse.toArgs()
|
||||||
|
}
|
||||||
|
|
||||||
|
object DataCoercionTimestampNtzFixtures {
|
||||||
|
/** See [DataCoercionTimestampTzFixtures.commonWarehouse] for explanation */
|
||||||
|
val commonWarehouse =
|
||||||
|
listOf(
|
||||||
|
case(NULL, NullValue, null),
|
||||||
|
case(
|
||||||
|
SIMPLE_TIMESTAMP,
|
||||||
|
TimestampWithoutTimezoneValue("2025-01-23T12:34:56.789"),
|
||||||
|
"2025-01-23T12:34:56.789",
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
UNIX_EPOCH,
|
||||||
|
TimestampWithoutTimezoneValue("1970-01-01T00:00:00"),
|
||||||
|
"1970-01-01T00:00:00",
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
MINIMUM_TIMESTAMP,
|
||||||
|
TimestampWithoutTimezoneValue("0001-01-01T00:00:00"),
|
||||||
|
"0001-01-01T00:00:00",
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
MAXIMUM_TIMESTAMP,
|
||||||
|
TimestampWithoutTimezoneValue("9999-12-31T23:59:59.999999999"),
|
||||||
|
"9999-12-31T23:59:59.999999999",
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
OUT_OF_RANGE_TIMESTAMP,
|
||||||
|
TimestampWithoutTimezoneValue(ldt("10000-01-01T00:00")),
|
||||||
|
null,
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION,
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
HIGH_PRECISION_TIMESTAMP,
|
||||||
|
TimestampWithoutTimezoneValue("2025-01-23T01:01:00.123456789"),
|
||||||
|
"2025-01-23T01:01:00.123456789",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
@JvmStatic fun commonWarehouse() = commonWarehouse.toArgs()
|
||||||
|
}
|
||||||
|
|
||||||
|
const val MIDNIGHT = "midnight"
|
||||||
|
const val MAX_TIME = "max time"
|
||||||
|
const val HIGH_NOON = "high noon"
|
||||||
|
|
||||||
|
object DataCoercionTimeTzFixtures {
|
||||||
|
val timetz =
|
||||||
|
listOf(
|
||||||
|
case(NULL, NullValue, null),
|
||||||
|
case(MIDNIGHT, TimeWithTimezoneValue("00:00Z"), "00:00Z"),
|
||||||
|
case(MAX_TIME, TimeWithTimezoneValue("23:59:59.999999999Z"), "23:59:59.999999999Z"),
|
||||||
|
case(HIGH_NOON, TimeWithTimezoneValue("12:00Z"), "12:00Z"),
|
||||||
|
)
|
||||||
|
|
||||||
|
@JvmStatic fun timetz() = timetz.toArgs()
|
||||||
|
}
|
||||||
|
|
||||||
|
object DataCoercionTimeNtzFixtures {
|
||||||
|
val timentz =
|
||||||
|
listOf(
|
||||||
|
case(NULL, NullValue, null),
|
||||||
|
case(MIDNIGHT, TimeWithoutTimezoneValue("00:00"), "00:00"),
|
||||||
|
case(MAX_TIME, TimeWithoutTimezoneValue("23:59:59.999999999"), "23:59:59.999999999"),
|
||||||
|
case(HIGH_NOON, TimeWithoutTimezoneValue("12:00"), "12:00"),
|
||||||
|
)
|
||||||
|
|
||||||
|
@JvmStatic fun timentz() = timentz.toArgs()
|
||||||
|
}
|
||||||
|
|
||||||
|
object DataCoercionDateFixtures {
|
||||||
|
val commonWarehouse =
|
||||||
|
listOf(
|
||||||
|
case(NULL, NullValue, null),
|
||||||
|
case(
|
||||||
|
SIMPLE_TIMESTAMP,
|
||||||
|
DateValue("2025-01-23"),
|
||||||
|
"2025-01-23",
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
UNIX_EPOCH,
|
||||||
|
DateValue("1970-01-01"),
|
||||||
|
"1970-01-01",
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
MINIMUM_TIMESTAMP,
|
||||||
|
DateValue("0001-01-01"),
|
||||||
|
"0001-01-01",
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
MAXIMUM_TIMESTAMP,
|
||||||
|
DateValue("9999-12-31"),
|
||||||
|
"9999-12-31",
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
OUT_OF_RANGE_TIMESTAMP,
|
||||||
|
DateValue(date("10000-01-01")),
|
||||||
|
null,
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
@JvmStatic fun commonWarehouse() = commonWarehouse.toArgs()
|
||||||
|
}
|
||||||
|
|
||||||
|
object DataCoercionStringFixtures {
|
||||||
|
const val EMPTY_STRING = "empty string"
|
||||||
|
const val SHORT_STRING = "short string"
|
||||||
|
const val LONG_STRING = "long string"
|
||||||
|
const val SPECIAL_CHARS_STRING = "special chars string"
|
||||||
|
|
||||||
|
val strings =
|
||||||
|
listOf(
|
||||||
|
case(NULL, NullValue, null),
|
||||||
|
case(EMPTY_STRING, StringValue(""), ""),
|
||||||
|
case(SHORT_STRING, StringValue("foo"), "foo"),
|
||||||
|
// Implementers may override this to test their destination-specific limits.
|
||||||
|
// The default value is 8MB + 1 byte (slightly longer than snowflake's varchar limit).
|
||||||
|
case(
|
||||||
|
LONG_STRING,
|
||||||
|
StringValue("a".repeat(16777216 + 1)),
|
||||||
|
null,
|
||||||
|
Reason.DESTINATION_FIELD_SIZE_LIMITATION
|
||||||
|
),
|
||||||
|
case(
|
||||||
|
SPECIAL_CHARS_STRING,
|
||||||
|
StringValue("`~!@#$%^&*()-=_+[]\\{}|o'O\",./<>?)Δ⅀↑∀"),
|
||||||
|
"`~!@#$%^&*()-=_+[]\\{}|o'O\",./<>?)Δ⅀↑∀"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
@JvmStatic fun strings() = strings.toArgs()
|
||||||
|
}
|
||||||
|
|
||||||
|
object DataCoercionObjectFixtures {
|
||||||
|
const val EMPTY_OBJECT = "empty object"
|
||||||
|
const val NORMAL_OBJECT = "normal object"
|
||||||
|
|
||||||
|
val objects =
|
||||||
|
listOf(
|
||||||
|
case(NULL, NullValue, null),
|
||||||
|
case(EMPTY_OBJECT, ObjectValue(linkedMapOf()), emptyMap<String, Any?>()),
|
||||||
|
case(
|
||||||
|
NORMAL_OBJECT,
|
||||||
|
ObjectValue(linkedMapOf("foo" to StringValue("bar"))),
|
||||||
|
mapOf("foo" to "bar")
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
val stringifiedObjects =
|
||||||
|
objects.map { fixture ->
|
||||||
|
fixture.copy(outputValue = fixture.outputValue?.serializeToString())
|
||||||
|
}
|
||||||
|
|
||||||
|
@JvmStatic fun objects() = objects.toArgs()
|
||||||
|
|
||||||
|
@JvmStatic fun stringifiedObjects() = stringifiedObjects.toArgs()
|
||||||
|
}
|
||||||
|
|
||||||
|
object DataCoercionArrayFixtures {
|
||||||
|
const val EMPTY_ARRAY = "empty array"
|
||||||
|
const val NORMAL_ARRAY = "normal array"
|
||||||
|
|
||||||
|
val arrays =
|
||||||
|
listOf(
|
||||||
|
case(NULL, NullValue, null),
|
||||||
|
case(EMPTY_ARRAY, ArrayValue(emptyList()), emptyList<Any?>()),
|
||||||
|
case(NORMAL_ARRAY, ArrayValue(listOf(StringValue("foo"))), listOf("foo")),
|
||||||
|
)
|
||||||
|
|
||||||
|
val stringifiedArrays =
|
||||||
|
arrays.map { fixture ->
|
||||||
|
fixture.copy(outputValue = fixture.outputValue?.serializeToString())
|
||||||
|
}
|
||||||
|
|
||||||
|
@JvmStatic fun arrays() = arrays.toArgs()
|
||||||
|
|
||||||
|
@JvmStatic fun stringifiedArrays() = stringifiedArrays.toArgs()
|
||||||
|
}
|
||||||
|
|
||||||
|
const val UNION_INT_VALUE = "int value"
|
||||||
|
const val UNION_OBJ_VALUE = "object value"
|
||||||
|
const val UNION_STR_VALUE = "string value"
|
||||||
|
|
||||||
|
object DataCoercionUnionFixtures {
|
||||||
|
val unions =
|
||||||
|
listOf(
|
||||||
|
case(NULL, NullValue, null),
|
||||||
|
case(UNION_INT_VALUE, IntegerValue(42), 42L),
|
||||||
|
case(UNION_STR_VALUE, StringValue("foo"), "foo"),
|
||||||
|
case(
|
||||||
|
UNION_OBJ_VALUE,
|
||||||
|
ObjectValue(linkedMapOf("foo" to StringValue("bar"))),
|
||||||
|
mapOf("foo" to "bar")
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
val stringifiedUnions =
|
||||||
|
unions.map { fixture ->
|
||||||
|
fixture.copy(outputValue = fixture.outputValue?.serializeToString())
|
||||||
|
}
|
||||||
|
|
||||||
|
@JvmStatic fun unions() = unions.toArgs()
|
||||||
|
|
||||||
|
@JvmStatic fun stringifiedUnions() = stringifiedUnions.toArgs()
|
||||||
|
}
|
||||||
|
|
||||||
|
object DataCoercionLegacyUnionFixtures {
|
||||||
|
val unions =
|
||||||
|
listOf(
|
||||||
|
case(NULL, NullValue, null),
|
||||||
|
// Legacy union of int x object will select object, and you can't write an int to an
|
||||||
|
// object column.
|
||||||
|
// So we should null it out.
|
||||||
|
case(UNION_INT_VALUE, IntegerValue(42), null, Reason.DESTINATION_TYPECAST_ERROR),
|
||||||
|
// Similarly, we should null out strings.
|
||||||
|
case(UNION_STR_VALUE, StringValue("foo"), "foo"),
|
||||||
|
// But objects can be written as objects, so retain this value.
|
||||||
|
case(
|
||||||
|
UNION_OBJ_VALUE,
|
||||||
|
ObjectValue(linkedMapOf("foo" to StringValue("bar"))),
|
||||||
|
mapOf("foo" to "bar")
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
val stringifiedUnions =
|
||||||
|
DataCoercionUnionFixtures.unions.map { fixture ->
|
||||||
|
fixture.copy(outputValue = fixture.outputValue?.serializeToString())
|
||||||
|
}
|
||||||
|
|
||||||
|
@JvmStatic fun unions() = unions.toArgs()
|
||||||
|
|
||||||
|
@JvmStatic fun stringifiedUnions() = DataCoercionUnionFixtures.stringifiedUnions.toArgs()
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is pretty much identical to UnionFixtures, but separating them in case we need to add
|
||||||
|
// different test cases for either of them.
|
||||||
|
object DataCoercionUnknownFixtures {
|
||||||
|
const val INT_VALUE = "integer value"
|
||||||
|
const val STR_VALUE = "string value"
|
||||||
|
const val OBJ_VALUE = "object value"
|
||||||
|
|
||||||
|
val unknowns =
|
||||||
|
listOf(
|
||||||
|
case(NULL, NullValue, null),
|
||||||
|
case(INT_VALUE, IntegerValue(42), 42L),
|
||||||
|
case(STR_VALUE, StringValue("foo"), "foo"),
|
||||||
|
case(
|
||||||
|
OBJ_VALUE,
|
||||||
|
ObjectValue(linkedMapOf("foo" to StringValue("bar"))),
|
||||||
|
mapOf("foo" to "bar")
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
val stringifiedUnknowns =
|
||||||
|
unknowns.map { fixture ->
|
||||||
|
fixture.copy(outputValue = fixture.outputValue?.serializeToString())
|
||||||
|
}
|
||||||
|
|
||||||
|
@JvmStatic fun unknowns() = unknowns.toArgs()
|
||||||
|
|
||||||
|
@JvmStatic fun stringifiedUnknowns() = stringifiedUnknowns.toArgs()
|
||||||
|
}
|
||||||
|
|
||||||
|
fun List<DataCoercionTestCase>.toArgs(): List<Arguments> =
|
||||||
|
this.map { Arguments.argumentSet(it.name, it.inputValue, it.outputValue, it.changeReason) }
|
||||||
|
.toList()
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility method to use the BigDecimal constructor (supports exponential notation like `1e38`) to
|
||||||
|
* construct a BigInteger.
|
||||||
|
*/
|
||||||
|
fun bigint(str: String): BigInteger = BigDecimal(str).toBigIntegerExact()
|
||||||
|
|
||||||
|
/** Shorthand utility method to construct a bigint from a long */
|
||||||
|
fun bigint(long: Long): BigInteger = BigInteger.valueOf(long)
|
||||||
|
|
||||||
|
fun bigdec(str: String): BigDecimal = BigDecimal(str)
|
||||||
|
|
||||||
|
fun bigdec(double: Double): BigDecimal = BigDecimal.valueOf(double)
|
||||||
|
|
||||||
|
fun bigdec(int: Int): BigDecimal = BigDecimal.valueOf(int.toDouble())
|
||||||
|
|
||||||
|
fun odt(str: String): OffsetDateTime = OffsetDateTime.parse(str, dateTimeFormatter)
|
||||||
|
|
||||||
|
fun ldt(str: String): LocalDateTime = LocalDateTime.parse(str, dateTimeFormatter)
|
||||||
|
|
||||||
|
fun date(str: String): LocalDate = LocalDate.parse(str, dateFormatter)
|
||||||
|
|
||||||
|
// The default java.time.*.parse() behavior only accepts up to 4-digit years.
|
||||||
|
// Build a custom formatter to handle larger years.
|
||||||
|
val dateFormatter =
|
||||||
|
DateTimeFormatterBuilder()
|
||||||
|
// java.time.* supports up to 9-digit years
|
||||||
|
.appendValue(ChronoField.YEAR, 1, 9, SignStyle.NORMAL)
|
||||||
|
.appendLiteral('-')
|
||||||
|
.appendValue(ChronoField.MONTH_OF_YEAR)
|
||||||
|
.appendLiteral('-')
|
||||||
|
.appendValue(ChronoField.DAY_OF_MONTH)
|
||||||
|
.toFormatter()
|
||||||
|
|
||||||
|
val dateTimeFormatter =
|
||||||
|
DateTimeFormatterBuilder()
|
||||||
|
.append(dateFormatter)
|
||||||
|
.appendLiteral('T')
|
||||||
|
// Accepts strings with/without an offset, so we can use this formatter
|
||||||
|
// for both timestamp with and without timezone
|
||||||
|
.append(DateTimeFormatter.ISO_TIME)
|
||||||
|
.toFormatter()
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents a single data coercion test case. You probably want to use [case] as a shorthand
|
||||||
|
* constructor.
|
||||||
|
*
|
||||||
|
* @param name A short human-readable name for the test. Primarily useful for tests where
|
||||||
|
* [inputValue] is either very long, or otherwise hard to read.
|
||||||
|
* @param inputValue The value to pass into [ValueCoercer.validate]
|
||||||
|
* @param outputValue The value that we expect to read back from the destination. Should be
|
||||||
|
* basically equivalent to the output of [ValueCoercer.validate]
|
||||||
|
* @param changeReason If `validate` returns Truncate/Nullify, the reason for that
|
||||||
|
* truncation/nullification. If `validate` returns Valid, this should be null.
|
||||||
|
*/
|
||||||
|
data class DataCoercionTestCase(
|
||||||
|
val name: String,
|
||||||
|
val inputValue: AirbyteValue,
|
||||||
|
val outputValue: Any?,
|
||||||
|
val changeReason: Reason? = null,
|
||||||
|
)
|
||||||
|
|
||||||
|
fun case(
|
||||||
|
name: String,
|
||||||
|
inputValue: AirbyteValue,
|
||||||
|
outputValue: Any?,
|
||||||
|
changeReason: Reason? = null,
|
||||||
|
) = DataCoercionTestCase(name, inputValue, outputValue, changeReason)
|
||||||
|
|
||||||
|
const val NULL = "null"
|
||||||
@@ -0,0 +1,369 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package io.airbyte.cdk.load.component
|
||||||
|
|
||||||
|
import io.airbyte.cdk.load.data.AirbyteValue
|
||||||
|
import io.airbyte.cdk.load.data.ArrayType
|
||||||
|
import io.airbyte.cdk.load.data.ArrayTypeWithoutSchema
|
||||||
|
import io.airbyte.cdk.load.data.BooleanType
|
||||||
|
import io.airbyte.cdk.load.data.BooleanValue
|
||||||
|
import io.airbyte.cdk.load.data.DateType
|
||||||
|
import io.airbyte.cdk.load.data.FieldType
|
||||||
|
import io.airbyte.cdk.load.data.IntegerType
|
||||||
|
import io.airbyte.cdk.load.data.NumberType
|
||||||
|
import io.airbyte.cdk.load.data.ObjectType
|
||||||
|
import io.airbyte.cdk.load.data.ObjectTypeWithEmptySchema
|
||||||
|
import io.airbyte.cdk.load.data.ObjectTypeWithoutSchema
|
||||||
|
import io.airbyte.cdk.load.data.StringType
|
||||||
|
import io.airbyte.cdk.load.data.TimeTypeWithTimezone
|
||||||
|
import io.airbyte.cdk.load.data.TimeTypeWithoutTimezone
|
||||||
|
import io.airbyte.cdk.load.data.TimestampTypeWithTimezone
|
||||||
|
import io.airbyte.cdk.load.data.TimestampTypeWithoutTimezone
|
||||||
|
import io.airbyte.cdk.load.data.UnionType
|
||||||
|
import io.airbyte.cdk.load.data.UnknownType
|
||||||
|
import io.airbyte.cdk.load.dataflow.transform.ValueCoercer
|
||||||
|
import io.airbyte.cdk.load.message.Meta
|
||||||
|
import io.airbyte.cdk.load.schema.TableSchemaFactory
|
||||||
|
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||||
|
import io.airbyte.cdk.load.util.Jsons
|
||||||
|
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange.Reason
|
||||||
|
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
|
||||||
|
import kotlinx.coroutines.test.runTest
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The tests in this class are designed to reference the parameters defined in
|
||||||
|
* `DataCoercionFixtures.kt`. For example, you might annotate [`handle integer values`] with
|
||||||
|
* `@MethodSource("io.airbyte.cdk.load.component.DataCoercionIntegerFixtures#int32")`. See each
|
||||||
|
* fixture class for explanations of what behavior they are exercising.
|
||||||
|
*
|
||||||
|
* Note that this class _only_ exercises [ValueCoercer.validate]. You should write separate unit
|
||||||
|
* tests for [ValueCoercer.map]. For now, the `map` function is primarily intended for transforming
|
||||||
|
* `UnionType` fields into other types (typically `StringType`), at which point your `validate`
|
||||||
|
* implementation should be able to handle any StringValue (regardless of whether it was originally
|
||||||
|
* a StringType or UnionType).
|
||||||
|
*/
|
||||||
|
@MicronautTest(environments = ["component"], resolveParameters = false)
|
||||||
|
interface DataCoercionSuite {
|
||||||
|
val coercer: ValueCoercer
|
||||||
|
val airbyteMetaColumnMapping: Map<String, String>
|
||||||
|
get() = Meta.COLUMN_NAMES.associateWith { it }
|
||||||
|
val columnNameMapping: ColumnNameMapping
|
||||||
|
get() = ColumnNameMapping(mapOf("test" to "test"))
|
||||||
|
|
||||||
|
val opsClient: TableOperationsClient
|
||||||
|
val testClient: TestTableOperationsClient
|
||||||
|
val schemaFactory: TableSchemaFactory
|
||||||
|
|
||||||
|
val harness: TableOperationsTestHarness
|
||||||
|
get() =
|
||||||
|
TableOperationsTestHarness(
|
||||||
|
opsClient,
|
||||||
|
testClient,
|
||||||
|
schemaFactory,
|
||||||
|
airbyteMetaColumnMapping
|
||||||
|
)
|
||||||
|
|
||||||
|
/** Fixtures are defined in [DataCoercionIntegerFixtures]. */
|
||||||
|
fun `handle integer values`(
|
||||||
|
inputValue: AirbyteValue,
|
||||||
|
expectedValue: Any?,
|
||||||
|
expectedChangeReason: Reason?
|
||||||
|
) = runTest {
|
||||||
|
harness.testValueCoercion(
|
||||||
|
coercer,
|
||||||
|
columnNameMapping,
|
||||||
|
FieldType(IntegerType, nullable = true),
|
||||||
|
inputValue,
|
||||||
|
expectedValue,
|
||||||
|
expectedChangeReason,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Fixtures are defined in [DataCoercionNumberFixtures]. */
|
||||||
|
fun `handle number values`(
|
||||||
|
inputValue: AirbyteValue,
|
||||||
|
expectedValue: Any?,
|
||||||
|
expectedChangeReason: Reason?
|
||||||
|
) = runTest {
|
||||||
|
harness.testValueCoercion(
|
||||||
|
coercer,
|
||||||
|
columnNameMapping,
|
||||||
|
FieldType(NumberType, nullable = true),
|
||||||
|
inputValue,
|
||||||
|
expectedValue,
|
||||||
|
expectedChangeReason,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Fixtures are defined in [DataCoercionTimestampTzFixtures]. */
|
||||||
|
fun `handle timestamptz values`(
|
||||||
|
inputValue: AirbyteValue,
|
||||||
|
expectedValue: Any?,
|
||||||
|
expectedChangeReason: Reason?
|
||||||
|
) = runTest {
|
||||||
|
harness.testValueCoercion(
|
||||||
|
coercer,
|
||||||
|
columnNameMapping,
|
||||||
|
FieldType(TimestampTypeWithTimezone, nullable = true),
|
||||||
|
inputValue,
|
||||||
|
expectedValue,
|
||||||
|
expectedChangeReason,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Fixtures are defined in [DataCoercionTimestampNtzFixtures]. */
|
||||||
|
fun `handle timestampntz values`(
|
||||||
|
inputValue: AirbyteValue,
|
||||||
|
expectedValue: Any?,
|
||||||
|
expectedChangeReason: Reason?
|
||||||
|
) = runTest {
|
||||||
|
harness.testValueCoercion(
|
||||||
|
coercer,
|
||||||
|
columnNameMapping,
|
||||||
|
FieldType(TimestampTypeWithoutTimezone, nullable = true),
|
||||||
|
inputValue,
|
||||||
|
expectedValue,
|
||||||
|
expectedChangeReason,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Fixtures are defined in [DataCoercionTimeTzFixtures]. */
|
||||||
|
fun `handle timetz values`(
|
||||||
|
inputValue: AirbyteValue,
|
||||||
|
expectedValue: Any?,
|
||||||
|
expectedChangeReason: Reason?
|
||||||
|
) = runTest {
|
||||||
|
harness.testValueCoercion(
|
||||||
|
coercer,
|
||||||
|
columnNameMapping,
|
||||||
|
FieldType(TimeTypeWithTimezone, nullable = true),
|
||||||
|
inputValue,
|
||||||
|
expectedValue,
|
||||||
|
expectedChangeReason,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Fixtures are defined in [DataCoercionTimeNtzFixtures]. */
|
||||||
|
fun `handle timentz values`(
|
||||||
|
inputValue: AirbyteValue,
|
||||||
|
expectedValue: Any?,
|
||||||
|
expectedChangeReason: Reason?
|
||||||
|
) = runTest {
|
||||||
|
harness.testValueCoercion(
|
||||||
|
coercer,
|
||||||
|
columnNameMapping,
|
||||||
|
FieldType(TimeTypeWithoutTimezone, nullable = true),
|
||||||
|
inputValue,
|
||||||
|
expectedValue,
|
||||||
|
expectedChangeReason,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Fixtures are defined in [DataCoercionDateFixtures]. */
|
||||||
|
fun `handle date values`(
|
||||||
|
inputValue: AirbyteValue,
|
||||||
|
expectedValue: Any?,
|
||||||
|
expectedChangeReason: Reason?
|
||||||
|
) = runTest {
|
||||||
|
harness.testValueCoercion(
|
||||||
|
coercer,
|
||||||
|
columnNameMapping,
|
||||||
|
FieldType(DateType, nullable = true),
|
||||||
|
inputValue,
|
||||||
|
expectedValue,
|
||||||
|
expectedChangeReason,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/** No fixtures, hardcoded to just write `true` */
|
||||||
|
fun `handle bool values`(expectedValue: Any?) = runTest {
|
||||||
|
harness.testValueCoercion(
|
||||||
|
coercer,
|
||||||
|
columnNameMapping,
|
||||||
|
FieldType(BooleanType, nullable = true),
|
||||||
|
// Just test on `true` and assume `false` also works
|
||||||
|
BooleanValue(true),
|
||||||
|
expectedValue,
|
||||||
|
// If your destination is nulling/truncating booleans... that's almost definitely a bug
|
||||||
|
expectedChangeReason = null,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Fixtures are defined in [DataCoercionStringFixtures]. */
|
||||||
|
fun `handle string values`(
|
||||||
|
inputValue: AirbyteValue,
|
||||||
|
expectedValue: Any?,
|
||||||
|
expectedChangeReason: Reason?
|
||||||
|
) = runTest {
|
||||||
|
harness.testValueCoercion(
|
||||||
|
coercer,
|
||||||
|
columnNameMapping,
|
||||||
|
FieldType(StringType, nullable = true),
|
||||||
|
inputValue,
|
||||||
|
expectedValue,
|
||||||
|
expectedChangeReason,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Fixtures are defined in [DataCoercionObjectFixtures]. */
|
||||||
|
fun `handle object values`(
|
||||||
|
inputValue: AirbyteValue,
|
||||||
|
expectedValue: Any?,
|
||||||
|
expectedChangeReason: Reason?
|
||||||
|
) = runTest {
|
||||||
|
harness.testValueCoercion(
|
||||||
|
coercer,
|
||||||
|
columnNameMapping,
|
||||||
|
FieldType(
|
||||||
|
ObjectType(linkedMapOf("foo" to FieldType(StringType, true))),
|
||||||
|
nullable = true
|
||||||
|
),
|
||||||
|
inputValue,
|
||||||
|
expectedValue,
|
||||||
|
expectedChangeReason,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Fixtures are defined in [DataCoercionObjectFixtures]. */
|
||||||
|
fun `handle empty object values`(
|
||||||
|
inputValue: AirbyteValue,
|
||||||
|
expectedValue: Any?,
|
||||||
|
expectedChangeReason: Reason?
|
||||||
|
) = runTest {
|
||||||
|
harness.testValueCoercion(
|
||||||
|
coercer,
|
||||||
|
columnNameMapping,
|
||||||
|
FieldType(ObjectTypeWithEmptySchema, nullable = true),
|
||||||
|
inputValue,
|
||||||
|
expectedValue,
|
||||||
|
expectedChangeReason,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Fixtures are defined in [DataCoercionObjectFixtures]. */
|
||||||
|
fun `handle schemaless object values`(
|
||||||
|
inputValue: AirbyteValue,
|
||||||
|
expectedValue: Any?,
|
||||||
|
expectedChangeReason: Reason?
|
||||||
|
) = runTest {
|
||||||
|
harness.testValueCoercion(
|
||||||
|
coercer,
|
||||||
|
columnNameMapping,
|
||||||
|
FieldType(ObjectTypeWithoutSchema, nullable = true),
|
||||||
|
inputValue,
|
||||||
|
expectedValue,
|
||||||
|
expectedChangeReason,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Fixtures are defined in [DataCoercionArrayFixtures]. */
|
||||||
|
fun `handle array values`(
|
||||||
|
inputValue: AirbyteValue,
|
||||||
|
expectedValue: Any?,
|
||||||
|
expectedChangeReason: Reason?
|
||||||
|
) = runTest {
|
||||||
|
harness.testValueCoercion(
|
||||||
|
coercer,
|
||||||
|
columnNameMapping,
|
||||||
|
FieldType(ArrayType(FieldType(StringType, true)), nullable = true),
|
||||||
|
inputValue,
|
||||||
|
expectedValue,
|
||||||
|
expectedChangeReason,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Fixtures are defined in [DataCoercionArrayFixtures]. */
|
||||||
|
fun `handle schemaless array values`(
|
||||||
|
inputValue: AirbyteValue,
|
||||||
|
expectedValue: Any?,
|
||||||
|
expectedChangeReason: Reason?
|
||||||
|
) = runTest {
|
||||||
|
harness.testValueCoercion(
|
||||||
|
coercer,
|
||||||
|
columnNameMapping,
|
||||||
|
FieldType(ArrayTypeWithoutSchema, nullable = true),
|
||||||
|
inputValue,
|
||||||
|
expectedValue,
|
||||||
|
expectedChangeReason,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* All destinations should implement this, even if your destination is supporting legacy unions.
|
||||||
|
*
|
||||||
|
* Fixtures are defined in [DataCoercionUnionFixtures].
|
||||||
|
*/
|
||||||
|
fun `handle union values`(
|
||||||
|
inputValue: AirbyteValue,
|
||||||
|
expectedValue: Any?,
|
||||||
|
expectedChangeReason: Reason?
|
||||||
|
) = runTest {
|
||||||
|
harness.testValueCoercion(
|
||||||
|
coercer,
|
||||||
|
columnNameMapping,
|
||||||
|
FieldType(
|
||||||
|
UnionType(
|
||||||
|
setOf(
|
||||||
|
ObjectType(linkedMapOf("foo" to FieldType(StringType, true))),
|
||||||
|
IntegerType,
|
||||||
|
StringType,
|
||||||
|
),
|
||||||
|
isLegacyUnion = false
|
||||||
|
),
|
||||||
|
nullable = true
|
||||||
|
),
|
||||||
|
inputValue,
|
||||||
|
expectedValue,
|
||||||
|
expectedChangeReason,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Only legacy destinations that are maintaining "legacy" union behavior should implement this
|
||||||
|
* test. If you're not sure, check whether your `application-connector.yaml` includes a
|
||||||
|
* `airbyte.destination.core.types.unions: LEGACY` property.
|
||||||
|
*
|
||||||
|
* Fixtures are defined in [DataCoercionLegacyUnionFixtures].
|
||||||
|
*/
|
||||||
|
fun `handle legacy union values`(
|
||||||
|
inputValue: AirbyteValue,
|
||||||
|
expectedValue: Any?,
|
||||||
|
expectedChangeReason: Reason?
|
||||||
|
) = runTest {
|
||||||
|
harness.testValueCoercion(
|
||||||
|
coercer,
|
||||||
|
columnNameMapping,
|
||||||
|
FieldType(
|
||||||
|
UnionType(
|
||||||
|
setOf(
|
||||||
|
ObjectType(linkedMapOf("foo" to FieldType(StringType, true))),
|
||||||
|
IntegerType,
|
||||||
|
StringType,
|
||||||
|
),
|
||||||
|
isLegacyUnion = true
|
||||||
|
),
|
||||||
|
nullable = true
|
||||||
|
),
|
||||||
|
inputValue,
|
||||||
|
expectedValue,
|
||||||
|
expectedChangeReason,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fun `handle unknown values`(
|
||||||
|
inputValue: AirbyteValue,
|
||||||
|
expectedValue: Any?,
|
||||||
|
expectedChangeReason: Reason?
|
||||||
|
) = runTest {
|
||||||
|
harness.testValueCoercion(
|
||||||
|
coercer,
|
||||||
|
columnNameMapping,
|
||||||
|
FieldType(UnknownType(Jsons.readTree(("""{"type": "potato"}"""))), nullable = true),
|
||||||
|
inputValue,
|
||||||
|
expectedValue,
|
||||||
|
expectedChangeReason,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -4,10 +4,7 @@
|
|||||||
|
|
||||||
package io.airbyte.cdk.load.component
|
package io.airbyte.cdk.load.component
|
||||||
|
|
||||||
import io.airbyte.cdk.load.command.Append
|
|
||||||
import io.airbyte.cdk.load.command.Dedupe
|
|
||||||
import io.airbyte.cdk.load.command.DestinationStream
|
import io.airbyte.cdk.load.command.DestinationStream
|
||||||
import io.airbyte.cdk.load.command.ImportType
|
|
||||||
import io.airbyte.cdk.load.command.NamespaceMapper
|
import io.airbyte.cdk.load.command.NamespaceMapper
|
||||||
import io.airbyte.cdk.load.data.AirbyteValue
|
import io.airbyte.cdk.load.data.AirbyteValue
|
||||||
import io.airbyte.cdk.load.data.ArrayType
|
import io.airbyte.cdk.load.data.ArrayType
|
||||||
@@ -26,15 +23,14 @@ import io.airbyte.cdk.load.data.TimeTypeWithoutTimezone
|
|||||||
import io.airbyte.cdk.load.data.TimestampTypeWithTimezone
|
import io.airbyte.cdk.load.data.TimestampTypeWithTimezone
|
||||||
import io.airbyte.cdk.load.data.TimestampTypeWithoutTimezone
|
import io.airbyte.cdk.load.data.TimestampTypeWithoutTimezone
|
||||||
import io.airbyte.cdk.load.data.TimestampWithTimezoneValue
|
import io.airbyte.cdk.load.data.TimestampWithTimezoneValue
|
||||||
|
import io.airbyte.cdk.load.data.UnionType
|
||||||
import io.airbyte.cdk.load.data.UnknownType
|
import io.airbyte.cdk.load.data.UnknownType
|
||||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_EXTRACTED_AT
|
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_EXTRACTED_AT
|
||||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_GENERATION_ID
|
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_GENERATION_ID
|
||||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_META
|
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_META
|
||||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_RAW_ID
|
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_RAW_ID
|
||||||
import io.airbyte.cdk.load.schema.model.ColumnSchema
|
|
||||||
import io.airbyte.cdk.load.schema.model.StreamTableSchema
|
import io.airbyte.cdk.load.schema.model.StreamTableSchema
|
||||||
import io.airbyte.cdk.load.schema.model.TableName
|
import io.airbyte.cdk.load.schema.model.TableName
|
||||||
import io.airbyte.cdk.load.schema.model.TableNames
|
|
||||||
import io.airbyte.cdk.load.table.CDC_DELETED_AT_COLUMN
|
import io.airbyte.cdk.load.table.CDC_DELETED_AT_COLUMN
|
||||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||||
import io.airbyte.cdk.load.util.Jsons
|
import io.airbyte.cdk.load.util.Jsons
|
||||||
@@ -89,6 +85,18 @@ object TableOperationsFixtures {
|
|||||||
"array" to FieldType(ArrayType(FieldType(StringType, true)), true),
|
"array" to FieldType(ArrayType(FieldType(StringType, true)), true),
|
||||||
"object" to
|
"object" to
|
||||||
FieldType(ObjectType(linkedMapOf("key" to FieldType(StringType, true))), true),
|
FieldType(ObjectType(linkedMapOf("key" to FieldType(StringType, true))), true),
|
||||||
|
"union" to
|
||||||
|
FieldType(
|
||||||
|
UnionType(setOf(StringType, IntegerType), isLegacyUnion = false),
|
||||||
|
true
|
||||||
|
),
|
||||||
|
// Most destinations just ignore the isLegacyUnion flag, which is totally fine.
|
||||||
|
// This is here for the small set of connectors that respect it.
|
||||||
|
"legacy_union" to
|
||||||
|
FieldType(
|
||||||
|
UnionType(setOf(StringType, IntegerType), isLegacyUnion = true),
|
||||||
|
true
|
||||||
|
),
|
||||||
"unknown" to FieldType(UnknownType(Jsons.readTree("""{"type": "potato"}""")), true),
|
"unknown" to FieldType(UnknownType(Jsons.readTree("""{"type": "potato"}""")), true),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
@@ -106,6 +114,8 @@ object TableOperationsFixtures {
|
|||||||
"time_ntz" to "time_ntz",
|
"time_ntz" to "time_ntz",
|
||||||
"array" to "array",
|
"array" to "array",
|
||||||
"object" to "object",
|
"object" to "object",
|
||||||
|
"union" to "union",
|
||||||
|
"legacy_union" to "legacy_union",
|
||||||
"unknown" to "unknown",
|
"unknown" to "unknown",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@@ -678,105 +688,24 @@ object TableOperationsFixtures {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Create common destination stream configurations
|
// Create common destination stream configurations
|
||||||
fun createAppendStream(
|
|
||||||
namespace: String,
|
|
||||||
name: String,
|
|
||||||
schema: ObjectType,
|
|
||||||
generationId: Long = 1,
|
|
||||||
minimumGenerationId: Long = 0,
|
|
||||||
syncId: Long = 1,
|
|
||||||
): DestinationStream =
|
|
||||||
DestinationStream(
|
|
||||||
unmappedNamespace = namespace,
|
|
||||||
unmappedName = name,
|
|
||||||
importType = Append,
|
|
||||||
generationId = generationId,
|
|
||||||
minimumGenerationId = minimumGenerationId,
|
|
||||||
syncId = syncId,
|
|
||||||
schema = schema,
|
|
||||||
namespaceMapper = NamespaceMapper(),
|
|
||||||
tableSchema =
|
|
||||||
StreamTableSchema(
|
|
||||||
tableNames = TableNames(finalTableName = TableName(namespace, name)),
|
|
||||||
columnSchema =
|
|
||||||
ColumnSchema(
|
|
||||||
inputSchema = schema.properties,
|
|
||||||
inputToFinalColumnNames = schema.properties.keys.associateWith { it },
|
|
||||||
finalSchema = mapOf(),
|
|
||||||
),
|
|
||||||
importType = Append,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
fun createDedupeStream(
|
|
||||||
namespace: String,
|
|
||||||
name: String,
|
|
||||||
schema: ObjectType,
|
|
||||||
primaryKey: List<List<String>>,
|
|
||||||
cursor: List<String>,
|
|
||||||
generationId: Long = 1,
|
|
||||||
minimumGenerationId: Long = 0,
|
|
||||||
syncId: Long = 1,
|
|
||||||
): DestinationStream =
|
|
||||||
DestinationStream(
|
|
||||||
unmappedNamespace = namespace,
|
|
||||||
unmappedName = name,
|
|
||||||
importType =
|
|
||||||
Dedupe(
|
|
||||||
primaryKey = primaryKey,
|
|
||||||
cursor = cursor,
|
|
||||||
),
|
|
||||||
generationId = generationId,
|
|
||||||
minimumGenerationId = minimumGenerationId,
|
|
||||||
syncId = syncId,
|
|
||||||
schema = schema,
|
|
||||||
namespaceMapper = NamespaceMapper(),
|
|
||||||
tableSchema =
|
|
||||||
StreamTableSchema(
|
|
||||||
tableNames = TableNames(finalTableName = TableName(namespace, name)),
|
|
||||||
columnSchema =
|
|
||||||
ColumnSchema(
|
|
||||||
inputSchema = schema.properties,
|
|
||||||
inputToFinalColumnNames = schema.properties.keys.associateWith { it },
|
|
||||||
finalSchema = mapOf(),
|
|
||||||
),
|
|
||||||
importType =
|
|
||||||
Dedupe(
|
|
||||||
primaryKey = primaryKey,
|
|
||||||
cursor = cursor,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
fun createStream(
|
fun createStream(
|
||||||
namespace: String,
|
namespace: String,
|
||||||
name: String,
|
name: String,
|
||||||
schema: ObjectType,
|
tableSchema: StreamTableSchema,
|
||||||
importType: ImportType,
|
|
||||||
generationId: Long = 1,
|
generationId: Long = 1,
|
||||||
minimumGenerationId: Long = 0,
|
minimumGenerationId: Long = 0,
|
||||||
syncId: Long = 1,
|
syncId: Long = 1,
|
||||||
) =
|
): DestinationStream =
|
||||||
DestinationStream(
|
DestinationStream(
|
||||||
unmappedNamespace = namespace,
|
unmappedNamespace = namespace,
|
||||||
unmappedName = name,
|
unmappedName = name,
|
||||||
importType = importType,
|
importType = tableSchema.importType,
|
||||||
generationId = generationId,
|
generationId = generationId,
|
||||||
minimumGenerationId = minimumGenerationId,
|
minimumGenerationId = minimumGenerationId,
|
||||||
syncId = syncId,
|
syncId = syncId,
|
||||||
schema = schema,
|
schema = ObjectType(LinkedHashMap(tableSchema.columnSchema.inputSchema)),
|
||||||
namespaceMapper = NamespaceMapper(),
|
namespaceMapper = NamespaceMapper(),
|
||||||
tableSchema =
|
tableSchema = tableSchema,
|
||||||
StreamTableSchema(
|
|
||||||
tableNames = TableNames(finalTableName = TableName("namespace", "test")),
|
|
||||||
columnSchema =
|
|
||||||
ColumnSchema(
|
|
||||||
inputSchema = schema.properties,
|
|
||||||
inputToFinalColumnNames = mapOf(),
|
|
||||||
finalSchema = mapOf(),
|
|
||||||
),
|
|
||||||
importType = importType,
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
fun <V> List<Map<String, V>>.sortBy(key: String) =
|
fun <V> List<Map<String, V>>.sortBy(key: String) =
|
||||||
@@ -800,6 +729,11 @@ object TableOperationsFixtures {
|
|||||||
return map { record -> record.mapKeys { (k, _) -> totalMapping.invert()[k] ?: k } }
|
return map { record -> record.mapKeys { (k, _) -> totalMapping.invert()[k] ?: k } }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun <V> List<Map<String, V>>.removeAirbyteColumns(
|
||||||
|
airbyteMetaColumnMapping: Map<String, String>
|
||||||
|
): List<Map<String, V>> =
|
||||||
|
this.map { rec -> rec.filter { !airbyteMetaColumnMapping.containsValue(it.key) } }
|
||||||
|
|
||||||
fun <V> List<Map<String, V>>.removeNulls() =
|
fun <V> List<Map<String, V>>.removeNulls() =
|
||||||
this.map { record -> record.filterValues { it != null } }
|
this.map { record -> record.filterValues { it != null } }
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,8 @@
|
|||||||
|
|
||||||
package io.airbyte.cdk.load.component
|
package io.airbyte.cdk.load.component
|
||||||
|
|
||||||
|
import io.airbyte.cdk.load.command.Append
|
||||||
|
import io.airbyte.cdk.load.command.Dedupe
|
||||||
import io.airbyte.cdk.load.component.TableOperationsFixtures as Fixtures
|
import io.airbyte.cdk.load.component.TableOperationsFixtures as Fixtures
|
||||||
import io.airbyte.cdk.load.component.TableOperationsFixtures.assertEquals
|
import io.airbyte.cdk.load.component.TableOperationsFixtures.assertEquals
|
||||||
import io.airbyte.cdk.load.component.TableOperationsFixtures.insertRecords
|
import io.airbyte.cdk.load.component.TableOperationsFixtures.insertRecords
|
||||||
@@ -18,6 +20,7 @@ import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_EXTRACTED_AT
|
|||||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_GENERATION_ID
|
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_GENERATION_ID
|
||||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_META
|
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_META
|
||||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_RAW_ID
|
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_RAW_ID
|
||||||
|
import io.airbyte.cdk.load.schema.TableSchemaFactory
|
||||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||||
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
|
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
|
||||||
import kotlinx.coroutines.test.runTest
|
import kotlinx.coroutines.test.runTest
|
||||||
@@ -48,12 +51,15 @@ interface TableOperationsSuite {
|
|||||||
/** The database client instance to test. Must be properly configured and connected. */
|
/** The database client instance to test. Must be properly configured and connected. */
|
||||||
val client: TableOperationsClient
|
val client: TableOperationsClient
|
||||||
val testClient: TestTableOperationsClient
|
val testClient: TestTableOperationsClient
|
||||||
|
val schemaFactory: TableSchemaFactory
|
||||||
|
|
||||||
// since ColumnNameMapping doesn't include the airbyte columns...
|
// since ColumnNameMapping doesn't include the airbyte columns...
|
||||||
val airbyteMetaColumnMapping: Map<String, String>
|
val airbyteMetaColumnMapping: Map<String, String>
|
||||||
get() = Meta.COLUMN_NAMES.associateWith { it }
|
get() = Meta.COLUMN_NAMES.associateWith { it }
|
||||||
|
|
||||||
private val harness: TableOperationsTestHarness
|
private val harness: TableOperationsTestHarness
|
||||||
get() = TableOperationsTestHarness(client, testClient, airbyteMetaColumnMapping)
|
get() =
|
||||||
|
TableOperationsTestHarness(client, testClient, schemaFactory, airbyteMetaColumnMapping)
|
||||||
|
|
||||||
/** Tests basic database connectivity by pinging the database. */
|
/** Tests basic database connectivity by pinging the database. */
|
||||||
fun `connect to database`() = runTest { assertDoesNotThrow { testClient.ping() } }
|
fun `connect to database`() = runTest { assertDoesNotThrow { testClient.ping() } }
|
||||||
@@ -84,16 +90,19 @@ interface TableOperationsSuite {
|
|||||||
val testTable = Fixtures.generateTestTableName("table-test-table", testNamespace)
|
val testTable = Fixtures.generateTestTableName("table-test-table", testNamespace)
|
||||||
harness.assertTableDoesNotExist(testTable)
|
harness.assertTableDoesNotExist(testTable)
|
||||||
|
|
||||||
|
val tableSchema =
|
||||||
|
schemaFactory.make(testTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
||||||
client.createTable(
|
client.createTable(
|
||||||
tableName = testTable,
|
tableName = testTable,
|
||||||
columnNameMapping = Fixtures.TEST_MAPPING,
|
columnNameMapping = Fixtures.TEST_MAPPING,
|
||||||
stream =
|
stream =
|
||||||
Fixtures.createAppendStream(
|
Fixtures.createStream(
|
||||||
namespace = testTable.namespace,
|
namespace = testTable.namespace,
|
||||||
name = testTable.name,
|
name = testTable.name,
|
||||||
schema = Fixtures.TEST_INTEGER_SCHEMA,
|
tableSchema = tableSchema,
|
||||||
),
|
),
|
||||||
replace = false,
|
replace = false,
|
||||||
)
|
)
|
||||||
@@ -129,11 +138,20 @@ interface TableOperationsSuite {
|
|||||||
val testTable = Fixtures.generateTestTableName("insert-test-table", testNamespace)
|
val testTable = Fixtures.generateTestTableName("insert-test-table", testNamespace)
|
||||||
harness.assertTableDoesNotExist(testTable)
|
harness.assertTableDoesNotExist(testTable)
|
||||||
|
|
||||||
|
val tableSchema =
|
||||||
|
schemaFactory.make(testTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
|
||||||
|
val stream =
|
||||||
|
Fixtures.createStream(
|
||||||
|
namespace = testTable.namespace,
|
||||||
|
name = testTable.name,
|
||||||
|
tableSchema = tableSchema,
|
||||||
|
)
|
||||||
|
|
||||||
try {
|
try {
|
||||||
harness.createTestTableAndVerifyExists(
|
harness.createTestTableAndVerifyExists(
|
||||||
tableName = testTable,
|
tableName = testTable,
|
||||||
schema = Fixtures.TEST_INTEGER_SCHEMA,
|
|
||||||
columnNameMapping = columnNameMapping,
|
columnNameMapping = columnNameMapping,
|
||||||
|
stream = stream,
|
||||||
)
|
)
|
||||||
|
|
||||||
testClient.insertRecords(testTable, inputRecords, columnNameMapping)
|
testClient.insertRecords(testTable, inputRecords, columnNameMapping)
|
||||||
@@ -142,7 +160,7 @@ interface TableOperationsSuite {
|
|||||||
|
|
||||||
assertEquals(
|
assertEquals(
|
||||||
expectedRecords,
|
expectedRecords,
|
||||||
resultRecords.reverseColumnNameMapping(columnNameMapping, airbyteMetaColumnMapping)
|
resultRecords.reverseColumnNameMapping(columnNameMapping, airbyteMetaColumnMapping),
|
||||||
)
|
)
|
||||||
} finally {
|
} finally {
|
||||||
harness.cleanupTable(testTable)
|
harness.cleanupTable(testTable)
|
||||||
@@ -174,11 +192,20 @@ interface TableOperationsSuite {
|
|||||||
val testTable = Fixtures.generateTestTableName("count-test-table", testNamespace)
|
val testTable = Fixtures.generateTestTableName("count-test-table", testNamespace)
|
||||||
harness.assertTableDoesNotExist(testTable)
|
harness.assertTableDoesNotExist(testTable)
|
||||||
|
|
||||||
|
val tableSchema =
|
||||||
|
schemaFactory.make(testTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
|
||||||
|
val stream =
|
||||||
|
Fixtures.createStream(
|
||||||
|
namespace = testTable.namespace,
|
||||||
|
name = testTable.name,
|
||||||
|
tableSchema = tableSchema,
|
||||||
|
)
|
||||||
|
|
||||||
try {
|
try {
|
||||||
harness.createTestTableAndVerifyExists(
|
harness.createTestTableAndVerifyExists(
|
||||||
tableName = testTable,
|
tableName = testTable,
|
||||||
schema = Fixtures.TEST_INTEGER_SCHEMA,
|
|
||||||
columnNameMapping = columnNameMapping,
|
columnNameMapping = columnNameMapping,
|
||||||
|
stream = stream,
|
||||||
)
|
)
|
||||||
|
|
||||||
val records1 =
|
val records1 =
|
||||||
@@ -322,11 +349,20 @@ interface TableOperationsSuite {
|
|||||||
val testTable = Fixtures.generateTestTableName("gen-id-test-table", testNamespace)
|
val testTable = Fixtures.generateTestTableName("gen-id-test-table", testNamespace)
|
||||||
harness.assertTableDoesNotExist(testTable)
|
harness.assertTableDoesNotExist(testTable)
|
||||||
|
|
||||||
|
val tableSchema =
|
||||||
|
schemaFactory.make(testTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
|
||||||
|
val stream =
|
||||||
|
Fixtures.createStream(
|
||||||
|
namespace = testTable.namespace,
|
||||||
|
name = testTable.name,
|
||||||
|
tableSchema = tableSchema,
|
||||||
|
)
|
||||||
|
|
||||||
try {
|
try {
|
||||||
harness.createTestTableAndVerifyExists(
|
harness.createTestTableAndVerifyExists(
|
||||||
tableName = testTable,
|
tableName = testTable,
|
||||||
schema = Fixtures.TEST_INTEGER_SCHEMA,
|
|
||||||
columnNameMapping = columnNameMapping,
|
columnNameMapping = columnNameMapping,
|
||||||
|
stream = stream,
|
||||||
)
|
)
|
||||||
|
|
||||||
val genId = 17L
|
val genId = 17L
|
||||||
@@ -382,18 +418,36 @@ interface TableOperationsSuite {
|
|||||||
harness.assertTableDoesNotExist(sourceTable)
|
harness.assertTableDoesNotExist(sourceTable)
|
||||||
harness.assertTableDoesNotExist(targetTable)
|
harness.assertTableDoesNotExist(targetTable)
|
||||||
|
|
||||||
|
val sourceTableSchema =
|
||||||
|
schemaFactory.make(sourceTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
|
||||||
|
val sourceStream =
|
||||||
|
Fixtures.createStream(
|
||||||
|
namespace = sourceTable.namespace,
|
||||||
|
name = sourceTable.name,
|
||||||
|
tableSchema = sourceTableSchema,
|
||||||
|
)
|
||||||
|
|
||||||
|
val targetTableSchema =
|
||||||
|
schemaFactory.make(targetTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
|
||||||
|
val targetStream =
|
||||||
|
Fixtures.createStream(
|
||||||
|
namespace = targetTable.namespace,
|
||||||
|
name = targetTable.name,
|
||||||
|
tableSchema = targetTableSchema,
|
||||||
|
)
|
||||||
|
|
||||||
try {
|
try {
|
||||||
harness.createTestTableAndVerifyExists(
|
harness.createTestTableAndVerifyExists(
|
||||||
sourceTable,
|
tableName = sourceTable,
|
||||||
Fixtures.TEST_INTEGER_SCHEMA,
|
columnNameMapping = columnNameMapping,
|
||||||
columnNameMapping,
|
stream = sourceStream,
|
||||||
)
|
)
|
||||||
harness.insertAndVerifyRecordCount(sourceTable, sourceInputRecords, columnNameMapping)
|
harness.insertAndVerifyRecordCount(sourceTable, sourceInputRecords, columnNameMapping)
|
||||||
|
|
||||||
harness.createTestTableAndVerifyExists(
|
harness.createTestTableAndVerifyExists(
|
||||||
targetTable,
|
tableName = targetTable,
|
||||||
Fixtures.TEST_INTEGER_SCHEMA,
|
columnNameMapping = columnNameMapping,
|
||||||
columnNameMapping,
|
stream = targetStream,
|
||||||
)
|
)
|
||||||
harness.insertAndVerifyRecordCount(targetTable, targetInputRecords, columnNameMapping)
|
harness.insertAndVerifyRecordCount(targetTable, targetInputRecords, columnNameMapping)
|
||||||
|
|
||||||
@@ -405,7 +459,7 @@ interface TableOperationsSuite {
|
|||||||
expectedRecords,
|
expectedRecords,
|
||||||
overwrittenTableRecords.reverseColumnNameMapping(
|
overwrittenTableRecords.reverseColumnNameMapping(
|
||||||
columnNameMapping,
|
columnNameMapping,
|
||||||
airbyteMetaColumnMapping
|
airbyteMetaColumnMapping,
|
||||||
),
|
),
|
||||||
"test",
|
"test",
|
||||||
"Expected records were not in the overwritten table.",
|
"Expected records were not in the overwritten table.",
|
||||||
@@ -454,18 +508,36 @@ interface TableOperationsSuite {
|
|||||||
harness.assertTableDoesNotExist(sourceTable)
|
harness.assertTableDoesNotExist(sourceTable)
|
||||||
harness.assertTableDoesNotExist(targetTable)
|
harness.assertTableDoesNotExist(targetTable)
|
||||||
|
|
||||||
|
val sourceTableSchema =
|
||||||
|
schemaFactory.make(sourceTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
|
||||||
|
val sourceStream =
|
||||||
|
Fixtures.createStream(
|
||||||
|
namespace = sourceTable.namespace,
|
||||||
|
name = sourceTable.name,
|
||||||
|
tableSchema = sourceTableSchema,
|
||||||
|
)
|
||||||
|
|
||||||
|
val targetTableSchema =
|
||||||
|
schemaFactory.make(targetTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
|
||||||
|
val targetStream =
|
||||||
|
Fixtures.createStream(
|
||||||
|
namespace = targetTable.namespace,
|
||||||
|
name = targetTable.name,
|
||||||
|
tableSchema = targetTableSchema,
|
||||||
|
)
|
||||||
|
|
||||||
try {
|
try {
|
||||||
harness.createTestTableAndVerifyExists(
|
harness.createTestTableAndVerifyExists(
|
||||||
sourceTable,
|
tableName = sourceTable,
|
||||||
Fixtures.TEST_INTEGER_SCHEMA,
|
columnNameMapping = columnNameMapping,
|
||||||
columnNameMapping,
|
stream = sourceStream,
|
||||||
)
|
)
|
||||||
harness.insertAndVerifyRecordCount(sourceTable, sourceInputRecords, columnNameMapping)
|
harness.insertAndVerifyRecordCount(sourceTable, sourceInputRecords, columnNameMapping)
|
||||||
|
|
||||||
harness.createTestTableAndVerifyExists(
|
harness.createTestTableAndVerifyExists(
|
||||||
targetTable,
|
tableName = targetTable,
|
||||||
Fixtures.TEST_INTEGER_SCHEMA,
|
columnNameMapping = columnNameMapping,
|
||||||
columnNameMapping,
|
stream = targetStream,
|
||||||
)
|
)
|
||||||
harness.insertAndVerifyRecordCount(targetTable, targetInputRecords, columnNameMapping)
|
harness.insertAndVerifyRecordCount(targetTable, targetInputRecords, columnNameMapping)
|
||||||
|
|
||||||
@@ -477,10 +549,10 @@ interface TableOperationsSuite {
|
|||||||
expectedRecords,
|
expectedRecords,
|
||||||
copyTableRecords.reverseColumnNameMapping(
|
copyTableRecords.reverseColumnNameMapping(
|
||||||
columnNameMapping,
|
columnNameMapping,
|
||||||
airbyteMetaColumnMapping
|
airbyteMetaColumnMapping,
|
||||||
),
|
),
|
||||||
"test",
|
"test",
|
||||||
"Expected source records were not copied to the target table."
|
"Expected source records were not copied to the target table.",
|
||||||
)
|
)
|
||||||
} finally {
|
} finally {
|
||||||
harness.cleanupTable(sourceTable)
|
harness.cleanupTable(sourceTable)
|
||||||
@@ -520,31 +592,38 @@ interface TableOperationsSuite {
|
|||||||
|
|
||||||
harness.assertTableDoesNotExist(sourceTable)
|
harness.assertTableDoesNotExist(sourceTable)
|
||||||
|
|
||||||
|
val sourceTableSchema =
|
||||||
|
schemaFactory.make(sourceTable, Fixtures.ID_TEST_WITH_CDC_SCHEMA.properties, Append)
|
||||||
val sourceStream =
|
val sourceStream =
|
||||||
Fixtures.createAppendStream(
|
Fixtures.createStream(
|
||||||
namespace = sourceTable.namespace,
|
namespace = sourceTable.namespace,
|
||||||
name = sourceTable.name,
|
name = sourceTable.name,
|
||||||
schema = Fixtures.ID_TEST_WITH_CDC_SCHEMA,
|
tableSchema = sourceTableSchema,
|
||||||
)
|
)
|
||||||
|
|
||||||
val targetTable = Fixtures.generateTestTableName("upsert-test-target-table", testNamespace)
|
val targetTable = Fixtures.generateTestTableName("upsert-test-target-table", testNamespace)
|
||||||
|
|
||||||
harness.assertTableDoesNotExist(targetTable)
|
harness.assertTableDoesNotExist(targetTable)
|
||||||
|
|
||||||
|
val targetTableSchema =
|
||||||
|
schemaFactory.make(
|
||||||
|
targetTable,
|
||||||
|
Fixtures.ID_TEST_WITH_CDC_SCHEMA.properties,
|
||||||
|
Dedupe(
|
||||||
|
primaryKey = listOf(listOf(Fixtures.ID_FIELD)),
|
||||||
|
cursor = listOf(Fixtures.TEST_FIELD),
|
||||||
|
),
|
||||||
|
)
|
||||||
val targetStream =
|
val targetStream =
|
||||||
Fixtures.createDedupeStream(
|
Fixtures.createStream(
|
||||||
namespace = targetTable.namespace,
|
namespace = targetTable.namespace,
|
||||||
name = targetTable.name,
|
name = targetTable.name,
|
||||||
schema = Fixtures.ID_TEST_WITH_CDC_SCHEMA,
|
tableSchema = targetTableSchema,
|
||||||
primaryKey = listOf(listOf(Fixtures.ID_FIELD)),
|
|
||||||
cursor = listOf(Fixtures.TEST_FIELD),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
try {
|
try {
|
||||||
harness.createTestTableAndVerifyExists(
|
harness.createTestTableAndVerifyExists(
|
||||||
tableName = sourceTable,
|
tableName = sourceTable,
|
||||||
columnNameMapping = columnNameMapping,
|
columnNameMapping = columnNameMapping,
|
||||||
schema = Fixtures.ID_AND_TEST_SCHEMA,
|
|
||||||
stream = sourceStream,
|
stream = sourceStream,
|
||||||
)
|
)
|
||||||
harness.insertAndVerifyRecordCount(sourceTable, sourceInputRecords, columnNameMapping)
|
harness.insertAndVerifyRecordCount(sourceTable, sourceInputRecords, columnNameMapping)
|
||||||
@@ -552,7 +631,6 @@ interface TableOperationsSuite {
|
|||||||
harness.createTestTableAndVerifyExists(
|
harness.createTestTableAndVerifyExists(
|
||||||
tableName = targetTable,
|
tableName = targetTable,
|
||||||
columnNameMapping = columnNameMapping,
|
columnNameMapping = columnNameMapping,
|
||||||
schema = Fixtures.ID_TEST_WITH_CDC_SCHEMA,
|
|
||||||
stream = targetStream,
|
stream = targetStream,
|
||||||
)
|
)
|
||||||
harness.insertAndVerifyRecordCount(targetTable, targetInputRecords, columnNameMapping)
|
harness.insertAndVerifyRecordCount(targetTable, targetInputRecords, columnNameMapping)
|
||||||
@@ -565,10 +643,10 @@ interface TableOperationsSuite {
|
|||||||
expectedRecords,
|
expectedRecords,
|
||||||
upsertTableRecords.reverseColumnNameMapping(
|
upsertTableRecords.reverseColumnNameMapping(
|
||||||
columnNameMapping,
|
columnNameMapping,
|
||||||
airbyteMetaColumnMapping
|
airbyteMetaColumnMapping,
|
||||||
),
|
),
|
||||||
"id",
|
"id",
|
||||||
"Upserted table did not contain expected records."
|
"Upserted table did not contain expected records.",
|
||||||
)
|
)
|
||||||
} finally {
|
} finally {
|
||||||
harness.cleanupTable(sourceTable)
|
harness.cleanupTable(sourceTable)
|
||||||
|
|||||||
@@ -4,13 +4,24 @@
|
|||||||
|
|
||||||
package io.airbyte.cdk.load.component
|
package io.airbyte.cdk.load.component
|
||||||
|
|
||||||
|
import io.airbyte.cdk.load.command.Append
|
||||||
import io.airbyte.cdk.load.command.DestinationStream
|
import io.airbyte.cdk.load.command.DestinationStream
|
||||||
import io.airbyte.cdk.load.component.TableOperationsFixtures.createAppendStream
|
import io.airbyte.cdk.load.component.TableOperationsFixtures.inputRecord
|
||||||
import io.airbyte.cdk.load.component.TableOperationsFixtures.insertRecords
|
import io.airbyte.cdk.load.component.TableOperationsFixtures.insertRecords
|
||||||
|
import io.airbyte.cdk.load.component.TableOperationsFixtures.removeAirbyteColumns
|
||||||
|
import io.airbyte.cdk.load.component.TableOperationsFixtures.removeNulls
|
||||||
|
import io.airbyte.cdk.load.component.TableOperationsFixtures.reverseColumnNameMapping
|
||||||
import io.airbyte.cdk.load.data.AirbyteValue
|
import io.airbyte.cdk.load.data.AirbyteValue
|
||||||
|
import io.airbyte.cdk.load.data.EnrichedAirbyteValue
|
||||||
|
import io.airbyte.cdk.load.data.FieldType
|
||||||
|
import io.airbyte.cdk.load.data.NullValue
|
||||||
import io.airbyte.cdk.load.data.ObjectType
|
import io.airbyte.cdk.load.data.ObjectType
|
||||||
|
import io.airbyte.cdk.load.dataflow.transform.ValidationResult
|
||||||
|
import io.airbyte.cdk.load.dataflow.transform.ValueCoercer
|
||||||
|
import io.airbyte.cdk.load.schema.TableSchemaFactory
|
||||||
import io.airbyte.cdk.load.schema.model.TableName
|
import io.airbyte.cdk.load.schema.model.TableName
|
||||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||||
|
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange.Reason
|
||||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||||
import org.junit.jupiter.api.Assertions.assertEquals
|
import org.junit.jupiter.api.Assertions.assertEquals
|
||||||
|
|
||||||
@@ -23,20 +34,15 @@ private val log = KotlinLogging.logger {}
|
|||||||
class TableOperationsTestHarness(
|
class TableOperationsTestHarness(
|
||||||
private val client: TableOperationsClient,
|
private val client: TableOperationsClient,
|
||||||
private val testClient: TestTableOperationsClient,
|
private val testClient: TestTableOperationsClient,
|
||||||
|
private val schemaFactory: TableSchemaFactory,
|
||||||
private val airbyteMetaColumnMapping: Map<String, String>,
|
private val airbyteMetaColumnMapping: Map<String, String>,
|
||||||
) {
|
) {
|
||||||
|
|
||||||
/** Creates a test table with the given configuration and verifies it was created. */
|
/** Creates a test table with the given configuration and verifies it was created. */
|
||||||
suspend fun createTestTableAndVerifyExists(
|
suspend fun createTestTableAndVerifyExists(
|
||||||
tableName: TableName,
|
tableName: TableName,
|
||||||
schema: ObjectType,
|
|
||||||
columnNameMapping: ColumnNameMapping,
|
columnNameMapping: ColumnNameMapping,
|
||||||
stream: DestinationStream =
|
stream: DestinationStream
|
||||||
createAppendStream(
|
|
||||||
namespace = tableName.namespace,
|
|
||||||
name = tableName.name,
|
|
||||||
schema = schema,
|
|
||||||
)
|
|
||||||
) {
|
) {
|
||||||
client.createTable(
|
client.createTable(
|
||||||
stream = stream,
|
stream = stream,
|
||||||
@@ -108,8 +114,77 @@ class TableOperationsTestHarness(
|
|||||||
/** Reads records from a table, filtering out Meta columns. */
|
/** Reads records from a table, filtering out Meta columns. */
|
||||||
suspend fun readTableWithoutMetaColumns(tableName: TableName): List<Map<String, Any>> {
|
suspend fun readTableWithoutMetaColumns(tableName: TableName): List<Map<String, Any>> {
|
||||||
val tableRead = testClient.readTable(tableName)
|
val tableRead = testClient.readTable(tableName)
|
||||||
return tableRead.map { rec ->
|
return tableRead.removeAirbyteColumns(airbyteMetaColumnMapping)
|
||||||
rec.filter { !airbyteMetaColumnMapping.containsValue(it.key) }
|
}
|
||||||
|
|
||||||
|
/** Apply the coercer to a value and verify that we can write the coerced value correctly */
|
||||||
|
suspend fun testValueCoercion(
|
||||||
|
coercer: ValueCoercer,
|
||||||
|
columnNameMapping: ColumnNameMapping,
|
||||||
|
fieldType: FieldType,
|
||||||
|
inputValue: AirbyteValue,
|
||||||
|
expectedValue: Any?,
|
||||||
|
expectedChangeReason: Reason?,
|
||||||
|
) {
|
||||||
|
val testNamespace = TableOperationsFixtures.generateTestNamespace("test")
|
||||||
|
val tableName =
|
||||||
|
TableOperationsFixtures.generateTestTableName("table-test-table", testNamespace)
|
||||||
|
val schema = ObjectType(linkedMapOf("test" to fieldType))
|
||||||
|
val tableSchema = schemaFactory.make(tableName, schema.properties, Append)
|
||||||
|
val stream =
|
||||||
|
TableOperationsFixtures.createStream(
|
||||||
|
namespace = tableName.namespace,
|
||||||
|
name = tableName.name,
|
||||||
|
tableSchema = tableSchema,
|
||||||
|
)
|
||||||
|
|
||||||
|
val inputValueAsEnrichedAirbyteValue =
|
||||||
|
EnrichedAirbyteValue(
|
||||||
|
inputValue,
|
||||||
|
fieldType.type,
|
||||||
|
"test",
|
||||||
|
airbyteMetaField = null,
|
||||||
|
)
|
||||||
|
val validatedValue = coercer.validate(inputValueAsEnrichedAirbyteValue)
|
||||||
|
val valueToInsert: AirbyteValue
|
||||||
|
val changeReason: Reason?
|
||||||
|
when (validatedValue) {
|
||||||
|
is ValidationResult.ShouldNullify -> {
|
||||||
|
valueToInsert = NullValue
|
||||||
|
changeReason = validatedValue.reason
|
||||||
|
}
|
||||||
|
is ValidationResult.ShouldTruncate -> {
|
||||||
|
valueToInsert = validatedValue.truncatedValue
|
||||||
|
changeReason = validatedValue.reason
|
||||||
|
}
|
||||||
|
ValidationResult.Valid -> {
|
||||||
|
valueToInsert = inputValue
|
||||||
|
changeReason = null
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
client.createNamespace(testNamespace)
|
||||||
|
client.createTable(stream, tableName, columnNameMapping, replace = false)
|
||||||
|
testClient.insertRecords(
|
||||||
|
tableName,
|
||||||
|
columnNameMapping,
|
||||||
|
inputRecord("test" to valueToInsert),
|
||||||
|
)
|
||||||
|
|
||||||
|
val actualRecords =
|
||||||
|
testClient
|
||||||
|
.readTable(tableName)
|
||||||
|
.removeAirbyteColumns(airbyteMetaColumnMapping)
|
||||||
|
.reverseColumnNameMapping(columnNameMapping, airbyteMetaColumnMapping)
|
||||||
|
.removeNulls()
|
||||||
|
val actualValue = actualRecords.first()["test"]
|
||||||
|
assertEquals(
|
||||||
|
expectedValue,
|
||||||
|
actualValue,
|
||||||
|
"For input $inputValue, expected ${expectedValue.simpleClassName()}; actual value was ${actualValue.simpleClassName()}. Coercer output was $validatedValue.",
|
||||||
|
)
|
||||||
|
assertEquals(expectedChangeReason, changeReason)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fun Any?.simpleClassName() = this?.let { it::class.simpleName } ?: "null"
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ import io.airbyte.cdk.load.data.ObjectType
|
|||||||
import io.airbyte.cdk.load.data.StringType
|
import io.airbyte.cdk.load.data.StringType
|
||||||
import io.airbyte.cdk.load.data.StringValue
|
import io.airbyte.cdk.load.data.StringValue
|
||||||
import io.airbyte.cdk.load.message.Meta
|
import io.airbyte.cdk.load.message.Meta
|
||||||
|
import io.airbyte.cdk.load.schema.TableSchemaFactory
|
||||||
import io.airbyte.cdk.load.schema.model.TableName
|
import io.airbyte.cdk.load.schema.model.TableName
|
||||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||||
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
|
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
|
||||||
@@ -40,9 +41,16 @@ interface TableSchemaEvolutionSuite {
|
|||||||
|
|
||||||
val opsClient: TableOperationsClient
|
val opsClient: TableOperationsClient
|
||||||
val testClient: TestTableOperationsClient
|
val testClient: TestTableOperationsClient
|
||||||
|
val schemaFactory: TableSchemaFactory
|
||||||
|
|
||||||
private val harness: TableOperationsTestHarness
|
private val harness: TableOperationsTestHarness
|
||||||
get() = TableOperationsTestHarness(opsClient, testClient, airbyteMetaColumnMapping)
|
get() =
|
||||||
|
TableOperationsTestHarness(
|
||||||
|
opsClient,
|
||||||
|
testClient,
|
||||||
|
schemaFactory,
|
||||||
|
airbyteMetaColumnMapping
|
||||||
|
)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test that the connector can correctly discover all of its own data types. This test creates a
|
* Test that the connector can correctly discover all of its own data types. This test creates a
|
||||||
@@ -61,11 +69,13 @@ interface TableSchemaEvolutionSuite {
|
|||||||
) = runTest {
|
) = runTest {
|
||||||
val testNamespace = Fixtures.generateTestNamespace("namespace-test")
|
val testNamespace = Fixtures.generateTestNamespace("namespace-test")
|
||||||
val testTable = Fixtures.generateTestTableName("table-test-table", testNamespace)
|
val testTable = Fixtures.generateTestTableName("table-test-table", testNamespace)
|
||||||
|
val tableSchema =
|
||||||
|
schemaFactory.make(testTable, Fixtures.ALL_TYPES_SCHEMA.properties, Append)
|
||||||
val stream =
|
val stream =
|
||||||
Fixtures.createAppendStream(
|
Fixtures.createStream(
|
||||||
namespace = testTable.namespace,
|
namespace = testTable.namespace,
|
||||||
name = testTable.name,
|
name = testTable.name,
|
||||||
schema = Fixtures.ALL_TYPES_SCHEMA,
|
tableSchema = tableSchema,
|
||||||
)
|
)
|
||||||
|
|
||||||
opsClient.createNamespace(testNamespace)
|
opsClient.createNamespace(testNamespace)
|
||||||
@@ -97,11 +107,13 @@ interface TableSchemaEvolutionSuite {
|
|||||||
) {
|
) {
|
||||||
val testNamespace = Fixtures.generateTestNamespace("namespace-test")
|
val testNamespace = Fixtures.generateTestNamespace("namespace-test")
|
||||||
val testTable = Fixtures.generateTestTableName("table-test-table", testNamespace)
|
val testTable = Fixtures.generateTestTableName("table-test-table", testNamespace)
|
||||||
|
val tableSchema =
|
||||||
|
schemaFactory.make(testTable, Fixtures.ALL_TYPES_SCHEMA.properties, Append)
|
||||||
val stream =
|
val stream =
|
||||||
Fixtures.createAppendStream(
|
Fixtures.createStream(
|
||||||
namespace = testTable.namespace,
|
namespace = testTable.namespace,
|
||||||
name = testTable.name,
|
name = testTable.name,
|
||||||
schema = Fixtures.ALL_TYPES_SCHEMA,
|
tableSchema = tableSchema,
|
||||||
)
|
)
|
||||||
val computedSchema = client.computeSchema(stream, columnNameMapping)
|
val computedSchema = client.computeSchema(stream, columnNameMapping)
|
||||||
assertEquals(expectedComputedSchema, computedSchema)
|
assertEquals(expectedComputedSchema, computedSchema)
|
||||||
@@ -374,12 +386,13 @@ interface TableSchemaEvolutionSuite {
|
|||||||
"to_drop" to FieldType(StringType, true),
|
"to_drop" to FieldType(StringType, true),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
val initialTableSchema =
|
||||||
|
schemaFactory.make(testTable, initialSchema.properties, initialStreamImportType)
|
||||||
val initialStream =
|
val initialStream =
|
||||||
Fixtures.createStream(
|
Fixtures.createStream(
|
||||||
testTable.namespace,
|
testTable.namespace,
|
||||||
testTable.name,
|
testTable.name,
|
||||||
initialSchema,
|
initialTableSchema,
|
||||||
initialStreamImportType,
|
|
||||||
)
|
)
|
||||||
val modifiedSchema =
|
val modifiedSchema =
|
||||||
ObjectType(
|
ObjectType(
|
||||||
@@ -391,12 +404,13 @@ interface TableSchemaEvolutionSuite {
|
|||||||
"to_add" to FieldType(StringType, true),
|
"to_add" to FieldType(StringType, true),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
val modifiedTableSchema =
|
||||||
|
schemaFactory.make(testTable, modifiedSchema.properties, modifiedStreamImportType)
|
||||||
val modifiedStream =
|
val modifiedStream =
|
||||||
Fixtures.createStream(
|
Fixtures.createStream(
|
||||||
testTable.namespace,
|
testTable.namespace,
|
||||||
testTable.name,
|
testTable.name,
|
||||||
modifiedSchema,
|
modifiedTableSchema,
|
||||||
modifiedStreamImportType,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Create the table and compute the schema changeset
|
// Create the table and compute the schema changeset
|
||||||
@@ -548,16 +562,16 @@ interface TableSchemaEvolutionSuite {
|
|||||||
modifiedSchema: ObjectType,
|
modifiedSchema: ObjectType,
|
||||||
modifiedColumnNameMapping: ColumnNameMapping,
|
modifiedColumnNameMapping: ColumnNameMapping,
|
||||||
initialStream: DestinationStream =
|
initialStream: DestinationStream =
|
||||||
Fixtures.createAppendStream(
|
Fixtures.createStream(
|
||||||
namespace = testTable.namespace,
|
namespace = testTable.namespace,
|
||||||
name = testTable.name,
|
name = testTable.name,
|
||||||
schema = initialSchema,
|
tableSchema = schemaFactory.make(testTable, initialSchema.properties, Append),
|
||||||
),
|
),
|
||||||
modifiedStream: DestinationStream =
|
modifiedStream: DestinationStream =
|
||||||
Fixtures.createAppendStream(
|
Fixtures.createStream(
|
||||||
namespace = testTable.namespace,
|
namespace = testTable.namespace,
|
||||||
name = testTable.name,
|
name = testTable.name,
|
||||||
schema = modifiedSchema,
|
tableSchema = schemaFactory.make(testTable, modifiedSchema.properties, Append),
|
||||||
),
|
),
|
||||||
): SchemaEvolutionComputation {
|
): SchemaEvolutionComputation {
|
||||||
opsClient.createNamespace(testTable.namespace)
|
opsClient.createNamespace(testTable.namespace)
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
version=0.1.85
|
version=0.1.91
|
||||||
|
|||||||
@@ -10,5 +10,6 @@ CONNECTOR_PATH_PREFIXES = {
|
|||||||
"airbyte-integrations/connectors",
|
"airbyte-integrations/connectors",
|
||||||
"docs/integrations/sources",
|
"docs/integrations/sources",
|
||||||
"docs/integrations/destinations",
|
"docs/integrations/destinations",
|
||||||
|
"docs/ai-agents/connectors",
|
||||||
}
|
}
|
||||||
MERGE_METHOD = "squash"
|
MERGE_METHOD = "squash"
|
||||||
|
|||||||
@@ -75,7 +75,7 @@ This will copy the specified connector version to your development bucket. This
|
|||||||
_💡 Note: A prerequisite is you have [gsutil](https://cloud.google.com/storage/docs/gsutil) installed and have run `gsutil auth login`_
|
_💡 Note: A prerequisite is you have [gsutil](https://cloud.google.com/storage/docs/gsutil) installed and have run `gsutil auth login`_
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
TARGET_BUCKET=<YOUR-DEV_BUCKET> CONNECTOR="airbyte/source-stripe" VERSION="3.17.0-dev.ea013c8741" poetry run poe copy-connector-from-prod
|
TARGET_BUCKET=<YOUR-DEV_BUCKET> CONNECTOR="airbyte/source-stripe" VERSION="3.17.0-preview.ea013c8" poetry run poe copy-connector-from-prod
|
||||||
```
|
```
|
||||||
|
|
||||||
### Promote Connector Version to Latest
|
### Promote Connector Version to Latest
|
||||||
@@ -87,5 +87,5 @@ _💡 Note: A prerequisite is you have [gsutil](https://cloud.google.com/storage
|
|||||||
_⚠️ Warning: Its important to know that this will remove ANY existing files in the latest folder that are not in the versioned folder as it calls `gsutil rsync` with `-d` enabled._
|
_⚠️ Warning: Its important to know that this will remove ANY existing files in the latest folder that are not in the versioned folder as it calls `gsutil rsync` with `-d` enabled._
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
TARGET_BUCKET=<YOUR-DEV_BUCKET> CONNECTOR="airbyte/source-stripe" VERSION="3.17.0-dev.ea013c8741" poetry run poe promote-connector-to-latest
|
TARGET_BUCKET=<YOUR-DEV_BUCKET> CONNECTOR="airbyte/source-stripe" VERSION="3.17.0-preview.ea013c8" poetry run poe promote-connector-to-latest
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -28,8 +28,8 @@ def get_docker_hub_auth_token() -> str:
|
|||||||
|
|
||||||
|
|
||||||
def get_docker_hub_headers() -> Dict | None:
|
def get_docker_hub_headers() -> Dict | None:
|
||||||
if "DOCKER_HUB_USERNAME" not in os.environ or "DOCKER_HUB_PASSWORD" not in os.environ:
|
if not os.environ.get("DOCKER_HUB_USERNAME") or not os.environ.get("DOCKER_HUB_PASSWORD"):
|
||||||
# If the Docker Hub credentials are not provided, we can only anonymously call the Docker Hub API.
|
# If the Docker Hub credentials are not provided (or are empty), we can only anonymously call the Docker Hub API.
|
||||||
# This will only work for public images and lead to a lower rate limit.
|
# This will only work for public images and lead to a lower rate limit.
|
||||||
return {}
|
return {}
|
||||||
else:
|
else:
|
||||||
|
|||||||
@@ -434,7 +434,7 @@ def generate_and_persist_registry_entry(
|
|||||||
bucket_name (str): The name of the GCS bucket.
|
bucket_name (str): The name of the GCS bucket.
|
||||||
repo_metadata_file_path (pathlib.Path): The path to the spec file.
|
repo_metadata_file_path (pathlib.Path): The path to the spec file.
|
||||||
registry_type (str): The registry type.
|
registry_type (str): The registry type.
|
||||||
docker_image_tag (str): The docker image tag associated with this release. Typically a semver string (e.g. '1.2.3'), possibly with a suffix (e.g. '1.2.3-dev.abcde12345')
|
docker_image_tag (str): The docker image tag associated with this release. Typically a semver string (e.g. '1.2.3'), possibly with a suffix (e.g. '1.2.3-preview.abcde12')
|
||||||
is_prerelease (bool): Whether this is a prerelease, or a main release.
|
is_prerelease (bool): Whether this is a prerelease, or a main release.
|
||||||
"""
|
"""
|
||||||
# Read the repo metadata dict to bootstrap ourselves. We need the docker repository,
|
# Read the repo metadata dict to bootstrap ourselves. We need the docker repository,
|
||||||
@@ -444,7 +444,7 @@ def generate_and_persist_registry_entry(
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
# Now that we have the docker repo, read the appropriate versioned metadata from GCS.
|
# Now that we have the docker repo, read the appropriate versioned metadata from GCS.
|
||||||
# This metadata will differ in a few fields (e.g. in prerelease mode, dockerImageTag will contain the actual prerelease tag `1.2.3-dev.abcde12345`),
|
# This metadata will differ in a few fields (e.g. in prerelease mode, dockerImageTag will contain the actual prerelease tag `1.2.3-preview.abcde12`),
|
||||||
# so we'll treat this as the source of truth (ish. See below for how we handle the registryOverrides field.)
|
# so we'll treat this as the source of truth (ish. See below for how we handle the registryOverrides field.)
|
||||||
gcs_client = get_gcs_storage_client(gcs_creds=os.environ.get("GCS_CREDENTIALS"))
|
gcs_client = get_gcs_storage_client(gcs_creds=os.environ.get("GCS_CREDENTIALS"))
|
||||||
bucket = gcs_client.bucket(bucket_name)
|
bucket = gcs_client.bucket(bucket_name)
|
||||||
@@ -533,7 +533,9 @@ def generate_and_persist_registry_entry(
|
|||||||
|
|
||||||
# For latest versions that are disabled, delete any existing registry entry to remove it from the registry
|
# For latest versions that are disabled, delete any existing registry entry to remove it from the registry
|
||||||
if (
|
if (
|
||||||
"-rc" not in metadata_dict["data"]["dockerImageTag"] and "-dev" not in metadata_dict["data"]["dockerImageTag"]
|
"-rc" not in metadata_dict["data"]["dockerImageTag"]
|
||||||
|
and "-dev" not in metadata_dict["data"]["dockerImageTag"]
|
||||||
|
and "-preview" not in metadata_dict["data"]["dockerImageTag"]
|
||||||
) and not metadata_dict["data"]["registryOverrides"][registry_type]["enabled"]:
|
) and not metadata_dict["data"]["registryOverrides"][registry_type]["enabled"]:
|
||||||
logger.info(
|
logger.info(
|
||||||
f"{registry_type} is not enabled: deleting existing {registry_type} registry entry for {metadata_dict['data']['dockerRepository']} at latest path."
|
f"{registry_type} is not enabled: deleting existing {registry_type} registry entry for {metadata_dict['data']['dockerRepository']} at latest path."
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ data:
|
|||||||
connectorType: source
|
connectorType: source
|
||||||
dockerRepository: airbyte/image-exists-1
|
dockerRepository: airbyte/image-exists-1
|
||||||
githubIssueLabel: source-alloydb-strict-encrypt
|
githubIssueLabel: source-alloydb-strict-encrypt
|
||||||
dockerImageTag: 2.0.0-dev.cf3628ccf3
|
dockerImageTag: 2.0.0-preview.cf3628c
|
||||||
documentationUrl: https://docs.airbyte.com/integrations/sources/existingsource
|
documentationUrl: https://docs.airbyte.com/integrations/sources/existingsource
|
||||||
connectorSubtype: database
|
connectorSubtype: database
|
||||||
releaseStage: generally_available
|
releaseStage: generally_available
|
||||||
|
|||||||
@@ -231,7 +231,7 @@ def test_upload_prerelease(mocker, valid_metadata_yaml_files, tmp_path):
|
|||||||
mocker.patch.object(commands.click, "secho")
|
mocker.patch.object(commands.click, "secho")
|
||||||
mocker.patch.object(commands, "upload_metadata_to_gcs")
|
mocker.patch.object(commands, "upload_metadata_to_gcs")
|
||||||
|
|
||||||
prerelease_tag = "0.3.0-dev.6d33165120"
|
prerelease_tag = "0.3.0-preview.6d33165"
|
||||||
bucket = "my-bucket"
|
bucket = "my-bucket"
|
||||||
metadata_file_path = valid_metadata_yaml_files[0]
|
metadata_file_path = valid_metadata_yaml_files[0]
|
||||||
validator_opts = ValidatorOptions(docs_path=str(tmp_path), prerelease_tag=prerelease_tag)
|
validator_opts = ValidatorOptions(docs_path=str(tmp_path), prerelease_tag=prerelease_tag)
|
||||||
|
|||||||
@@ -582,7 +582,7 @@ def test_upload_metadata_to_gcs_invalid_docker_images(mocker, invalid_metadata_u
|
|||||||
def test_upload_metadata_to_gcs_with_prerelease(mocker, valid_metadata_upload_files, tmp_path):
|
def test_upload_metadata_to_gcs_with_prerelease(mocker, valid_metadata_upload_files, tmp_path):
|
||||||
mocker.spy(gcs_upload, "_file_upload")
|
mocker.spy(gcs_upload, "_file_upload")
|
||||||
mocker.spy(gcs_upload, "upload_file_if_changed")
|
mocker.spy(gcs_upload, "upload_file_if_changed")
|
||||||
prerelease_image_tag = "1.5.6-dev.f80318f754"
|
prerelease_image_tag = "1.5.6-preview.f80318f"
|
||||||
|
|
||||||
for valid_metadata_upload_file in valid_metadata_upload_files:
|
for valid_metadata_upload_file in valid_metadata_upload_files:
|
||||||
tmp_metadata_file_path = tmp_path / "metadata.yaml"
|
tmp_metadata_file_path = tmp_path / "metadata.yaml"
|
||||||
@@ -701,7 +701,7 @@ def test_upload_metadata_to_gcs_release_candidate(mocker, get_fixture_path, tmp_
|
|||||||
)
|
)
|
||||||
assert metadata.data.releases.rolloutConfiguration.enableProgressiveRollout
|
assert metadata.data.releases.rolloutConfiguration.enableProgressiveRollout
|
||||||
|
|
||||||
prerelease_tag = "1.5.6-dev.f80318f754" if prerelease else None
|
prerelease_tag = "1.5.6-preview.f80318f" if prerelease else None
|
||||||
|
|
||||||
upload_info = gcs_upload.upload_metadata_to_gcs(
|
upload_info = gcs_upload.upload_metadata_to_gcs(
|
||||||
"my_bucket",
|
"my_bucket",
|
||||||
|
|||||||
@@ -110,14 +110,14 @@ class PublishConnectorContext(ConnectorContext):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def pre_release_suffix(self) -> str:
|
def pre_release_suffix(self) -> str:
|
||||||
return self.git_revision[:10]
|
return self.git_revision[:7]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def docker_image_tag(self) -> str:
|
def docker_image_tag(self) -> str:
|
||||||
# get the docker image tag from the parent class
|
# get the docker image tag from the parent class
|
||||||
metadata_tag = super().docker_image_tag
|
metadata_tag = super().docker_image_tag
|
||||||
if self.pre_release:
|
if self.pre_release:
|
||||||
return f"{metadata_tag}-dev.{self.pre_release_suffix}"
|
return f"{metadata_tag}-preview.{self.pre_release_suffix}"
|
||||||
else:
|
else:
|
||||||
return metadata_tag
|
return metadata_tag
|
||||||
|
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ from pipelines.helpers.utils import raise_if_not_user
|
|||||||
from pipelines.models.steps import STEP_PARAMS, Step, StepResult
|
from pipelines.models.steps import STEP_PARAMS, Step, StepResult
|
||||||
|
|
||||||
# Pin the PyAirbyte version to avoid updates from breaking CI
|
# Pin the PyAirbyte version to avoid updates from breaking CI
|
||||||
PYAIRBYTE_VERSION = "0.20.2"
|
PYAIRBYTE_VERSION = "0.35.1"
|
||||||
|
|
||||||
|
|
||||||
class PytestStep(Step, ABC):
|
class PytestStep(Step, ABC):
|
||||||
|
|||||||
@@ -156,7 +156,8 @@ class TestPyAirbyteValidationTests:
|
|||||||
result = await PyAirbyteValidation(context_for_valid_connector)._run(mocker.MagicMock())
|
result = await PyAirbyteValidation(context_for_valid_connector)._run(mocker.MagicMock())
|
||||||
assert isinstance(result, StepResult)
|
assert isinstance(result, StepResult)
|
||||||
assert result.status == StepStatus.SUCCESS
|
assert result.status == StepStatus.SUCCESS
|
||||||
assert "Getting `spec` output from connector..." in result.stdout
|
# Verify the connector name appears in output (stable across PyAirbyte versions)
|
||||||
|
assert context_for_valid_connector.connector.technical_name in (result.stdout + result.stderr)
|
||||||
|
|
||||||
async def test__run_validation_skip_unpublished_connector(
|
async def test__run_validation_skip_unpublished_connector(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@@ -1,2 +1,2 @@
|
|||||||
cdkVersion=0.1.84
|
cdkVersion=0.1.89
|
||||||
JunitMethodExecutionTimeout=10m
|
JunitMethodExecutionTimeout=10m
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ data:
|
|||||||
connectorSubtype: database
|
connectorSubtype: database
|
||||||
connectorType: destination
|
connectorType: destination
|
||||||
definitionId: ce0d828e-1dc4-496c-b122-2da42e637e48
|
definitionId: ce0d828e-1dc4-496c-b122-2da42e637e48
|
||||||
dockerImageTag: 2.1.16-rc.1
|
dockerImageTag: 2.1.18
|
||||||
dockerRepository: airbyte/destination-clickhouse
|
dockerRepository: airbyte/destination-clickhouse
|
||||||
githubIssueLabel: destination-clickhouse
|
githubIssueLabel: destination-clickhouse
|
||||||
icon: clickhouse.svg
|
icon: clickhouse.svg
|
||||||
@@ -27,7 +27,7 @@ data:
|
|||||||
releaseStage: generally_available
|
releaseStage: generally_available
|
||||||
releases:
|
releases:
|
||||||
rolloutConfiguration:
|
rolloutConfiguration:
|
||||||
enableProgressiveRollout: true
|
enableProgressiveRollout: false
|
||||||
breakingChanges:
|
breakingChanges:
|
||||||
2.0.0:
|
2.0.0:
|
||||||
message: "This connector has been re-written from scratch. Data will now be typed and stored in final (non-raw) tables. The connector may require changes to its configuration to function properly and downstream pipelines may be affected. Warning: SSH tunneling is in Beta."
|
message: "This connector has been re-written from scratch. Data will now be typed and stored in final (non-raw) tables. The connector may require changes to its configuration to function properly and downstream pipelines may be affected. Warning: SSH tunneling is in Beta."
|
||||||
|
|||||||
@@ -7,26 +7,24 @@ package io.airbyte.integrations.destination.clickhouse.client
|
|||||||
import com.clickhouse.client.api.Client as ClickHouseClientRaw
|
import com.clickhouse.client.api.Client as ClickHouseClientRaw
|
||||||
import com.clickhouse.client.api.command.CommandResponse
|
import com.clickhouse.client.api.command.CommandResponse
|
||||||
import com.clickhouse.client.api.data_formats.ClickHouseBinaryFormatReader
|
import com.clickhouse.client.api.data_formats.ClickHouseBinaryFormatReader
|
||||||
import com.clickhouse.client.api.metadata.TableSchema
|
|
||||||
import com.clickhouse.client.api.query.QueryResponse
|
import com.clickhouse.client.api.query.QueryResponse
|
||||||
import com.clickhouse.data.ClickHouseColumn
|
import com.clickhouse.data.ClickHouseColumn
|
||||||
import com.clickhouse.data.ClickHouseDataType
|
import com.clickhouse.data.ClickHouseDataType
|
||||||
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings
|
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings
|
||||||
import io.airbyte.cdk.ConfigErrorException
|
import io.airbyte.cdk.ConfigErrorException
|
||||||
import io.airbyte.cdk.load.command.Dedupe
|
|
||||||
import io.airbyte.cdk.load.command.DestinationStream
|
import io.airbyte.cdk.load.command.DestinationStream
|
||||||
import io.airbyte.cdk.load.component.ColumnChangeset
|
import io.airbyte.cdk.load.component.ColumnChangeset
|
||||||
import io.airbyte.cdk.load.component.ColumnType
|
import io.airbyte.cdk.load.component.ColumnType
|
||||||
import io.airbyte.cdk.load.component.TableColumns
|
import io.airbyte.cdk.load.component.TableColumns
|
||||||
import io.airbyte.cdk.load.component.TableOperationsClient
|
import io.airbyte.cdk.load.component.TableOperationsClient
|
||||||
|
import io.airbyte.cdk.load.component.TableSchema
|
||||||
import io.airbyte.cdk.load.component.TableSchemaEvolutionClient
|
import io.airbyte.cdk.load.component.TableSchemaEvolutionClient
|
||||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAMES
|
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAMES
|
||||||
import io.airbyte.cdk.load.schema.model.TableName
|
import io.airbyte.cdk.load.schema.model.TableName
|
||||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||||
import io.airbyte.cdk.load.table.TempTableNameGenerator
|
import io.airbyte.cdk.load.table.TempTableNameGenerator
|
||||||
import io.airbyte.integrations.destination.clickhouse.client.ClickhouseSqlGenerator.Companion.DATETIME_WITH_PRECISION
|
import io.airbyte.integrations.destination.clickhouse.client.ClickhouseSqlTypes.DATETIME_WITH_PRECISION
|
||||||
import io.airbyte.integrations.destination.clickhouse.client.ClickhouseSqlGenerator.Companion.DECIMAL_WITH_PRECISION_AND_SCALE
|
import io.airbyte.integrations.destination.clickhouse.client.ClickhouseSqlTypes.DECIMAL_WITH_PRECISION_AND_SCALE
|
||||||
import io.airbyte.integrations.destination.clickhouse.spec.ClickhouseConfiguration
|
|
||||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||||
import jakarta.inject.Singleton
|
import jakarta.inject.Singleton
|
||||||
import kotlinx.coroutines.future.await
|
import kotlinx.coroutines.future.await
|
||||||
@@ -42,7 +40,6 @@ class ClickhouseAirbyteClient(
|
|||||||
private val client: ClickHouseClientRaw,
|
private val client: ClickHouseClientRaw,
|
||||||
private val sqlGenerator: ClickhouseSqlGenerator,
|
private val sqlGenerator: ClickhouseSqlGenerator,
|
||||||
private val tempTableNameGenerator: TempTableNameGenerator,
|
private val tempTableNameGenerator: TempTableNameGenerator,
|
||||||
private val clickhouseConfiguration: ClickhouseConfiguration,
|
|
||||||
) : TableOperationsClient, TableSchemaEvolutionClient {
|
) : TableOperationsClient, TableSchemaEvolutionClient {
|
||||||
|
|
||||||
override suspend fun createNamespace(namespace: String) {
|
override suspend fun createNamespace(namespace: String) {
|
||||||
@@ -59,9 +56,8 @@ class ClickhouseAirbyteClient(
|
|||||||
) {
|
) {
|
||||||
execute(
|
execute(
|
||||||
sqlGenerator.createTable(
|
sqlGenerator.createTable(
|
||||||
stream,
|
|
||||||
tableName,
|
tableName,
|
||||||
columnNameMapping,
|
stream.tableSchema,
|
||||||
replace,
|
replace,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
@@ -81,9 +77,10 @@ class ClickhouseAirbyteClient(
|
|||||||
sourceTableName: TableName,
|
sourceTableName: TableName,
|
||||||
targetTableName: TableName
|
targetTableName: TableName
|
||||||
) {
|
) {
|
||||||
|
val columnNames = columnNameMapping.values.toSet()
|
||||||
execute(
|
execute(
|
||||||
sqlGenerator.copyTable(
|
sqlGenerator.copyTable(
|
||||||
columnNameMapping,
|
columnNames,
|
||||||
sourceTableName,
|
sourceTableName,
|
||||||
targetTableName,
|
targetTableName,
|
||||||
),
|
),
|
||||||
@@ -99,10 +96,8 @@ class ClickhouseAirbyteClient(
|
|||||||
throw NotImplementedError("We rely on Clickhouse's table engine for deduping")
|
throw NotImplementedError("We rely on Clickhouse's table engine for deduping")
|
||||||
}
|
}
|
||||||
|
|
||||||
override suspend fun discoverSchema(
|
override suspend fun discoverSchema(tableName: TableName): TableSchema {
|
||||||
tableName: TableName
|
val tableSchema = client.getTableSchema(tableName.name, tableName.namespace)
|
||||||
): io.airbyte.cdk.load.component.TableSchema {
|
|
||||||
val tableSchema: TableSchema = client.getTableSchema(tableName.name, tableName.namespace)
|
|
||||||
|
|
||||||
log.info { "Fetch the clickhouse table schema: $tableSchema" }
|
log.info { "Fetch the clickhouse table schema: $tableSchema" }
|
||||||
|
|
||||||
@@ -121,7 +116,7 @@ class ClickhouseAirbyteClient(
|
|||||||
|
|
||||||
log.info { "Found Clickhouse columns: $tableSchemaWithoutAirbyteColumns" }
|
log.info { "Found Clickhouse columns: $tableSchemaWithoutAirbyteColumns" }
|
||||||
|
|
||||||
return io.airbyte.cdk.load.component.TableSchema(
|
return TableSchema(
|
||||||
tableSchemaWithoutAirbyteColumns.associate {
|
tableSchemaWithoutAirbyteColumns.associate {
|
||||||
it.columnName to ColumnType(it.dataType.getDataTypeAsString(), it.isNullable)
|
it.columnName to ColumnType(it.dataType.getDataTypeAsString(), it.isNullable)
|
||||||
},
|
},
|
||||||
@@ -131,42 +126,8 @@ class ClickhouseAirbyteClient(
|
|||||||
override fun computeSchema(
|
override fun computeSchema(
|
||||||
stream: DestinationStream,
|
stream: DestinationStream,
|
||||||
columnNameMapping: ColumnNameMapping
|
columnNameMapping: ColumnNameMapping
|
||||||
): io.airbyte.cdk.load.component.TableSchema {
|
): TableSchema {
|
||||||
val importType = stream.importType
|
return TableSchema(stream.tableSchema.columnSchema.finalSchema)
|
||||||
val primaryKey =
|
|
||||||
if (importType is Dedupe) {
|
|
||||||
sqlGenerator.extractPks(importType.primaryKey, columnNameMapping).toSet()
|
|
||||||
} else {
|
|
||||||
emptySet()
|
|
||||||
}
|
|
||||||
val cursor =
|
|
||||||
if (importType is Dedupe) {
|
|
||||||
if (importType.cursor.size > 1) {
|
|
||||||
throw ConfigErrorException(
|
|
||||||
"Only top-level cursors are supported. Got ${importType.cursor}"
|
|
||||||
)
|
|
||||||
}
|
|
||||||
importType.cursor.map { columnNameMapping[it] }.toSet()
|
|
||||||
} else {
|
|
||||||
emptySet()
|
|
||||||
}
|
|
||||||
return io.airbyte.cdk.load.component.TableSchema(
|
|
||||||
stream.schema
|
|
||||||
.asColumns()
|
|
||||||
.map { (fieldName, fieldType) ->
|
|
||||||
val clickhouseCompatibleName = columnNameMapping[fieldName]!!
|
|
||||||
val nullable =
|
|
||||||
!primaryKey.contains(clickhouseCompatibleName) &&
|
|
||||||
!cursor.contains(clickhouseCompatibleName)
|
|
||||||
val type = fieldType.type.toDialectType(clickhouseConfiguration.enableJson)
|
|
||||||
clickhouseCompatibleName to
|
|
||||||
ColumnType(
|
|
||||||
type = type,
|
|
||||||
nullable = nullable,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
.toMap(),
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
override suspend fun applyChangeset(
|
override suspend fun applyChangeset(
|
||||||
@@ -194,7 +155,6 @@ class ClickhouseAirbyteClient(
|
|||||||
applyDeduplicationChanges(
|
applyDeduplicationChanges(
|
||||||
stream,
|
stream,
|
||||||
tableName,
|
tableName,
|
||||||
columnNameMapping,
|
|
||||||
columnChangeset,
|
columnChangeset,
|
||||||
)
|
)
|
||||||
} else if (!columnChangeset.isNoop()) {
|
} else if (!columnChangeset.isNoop()) {
|
||||||
@@ -205,42 +165,28 @@ class ClickhouseAirbyteClient(
|
|||||||
private suspend fun applyDeduplicationChanges(
|
private suspend fun applyDeduplicationChanges(
|
||||||
stream: DestinationStream,
|
stream: DestinationStream,
|
||||||
properTableName: TableName,
|
properTableName: TableName,
|
||||||
columnNameMapping: ColumnNameMapping,
|
|
||||||
columnChangeset: ColumnChangeset,
|
columnChangeset: ColumnChangeset,
|
||||||
) {
|
) {
|
||||||
val tempTableName = tempTableNameGenerator.generate(properTableName)
|
val tempTableName = tempTableNameGenerator.generate(properTableName)
|
||||||
execute(sqlGenerator.createNamespace(tempTableName.namespace))
|
execute(sqlGenerator.createNamespace(tempTableName.namespace))
|
||||||
execute(
|
execute(
|
||||||
sqlGenerator.createTable(
|
sqlGenerator.createTable(
|
||||||
stream,
|
|
||||||
tempTableName,
|
tempTableName,
|
||||||
columnNameMapping,
|
stream.tableSchema,
|
||||||
true,
|
true,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
copyIntersectionColumn(
|
val columnNames =
|
||||||
columnChangeset.columnsToChange.keys + columnChangeset.columnsToRetain.keys,
|
columnChangeset.columnsToChange.keys + columnChangeset.columnsToRetain.keys
|
||||||
columnNameMapping,
|
|
||||||
properTableName,
|
|
||||||
tempTableName
|
|
||||||
)
|
|
||||||
execute(sqlGenerator.exchangeTable(tempTableName, properTableName))
|
|
||||||
execute(sqlGenerator.dropTable(tempTableName))
|
|
||||||
}
|
|
||||||
|
|
||||||
internal suspend fun copyIntersectionColumn(
|
|
||||||
columnsToCopy: Set<String>,
|
|
||||||
columnNameMapping: ColumnNameMapping,
|
|
||||||
properTableName: TableName,
|
|
||||||
tempTableName: TableName
|
|
||||||
) {
|
|
||||||
execute(
|
execute(
|
||||||
sqlGenerator.copyTable(
|
sqlGenerator.copyTable(
|
||||||
ColumnNameMapping(columnNameMapping.filter { columnsToCopy.contains(it.value) }),
|
columnNames,
|
||||||
properTableName,
|
properTableName,
|
||||||
tempTableName,
|
tempTableName,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
execute(sqlGenerator.exchangeTable(tempTableName, properTableName))
|
||||||
|
execute(sqlGenerator.dropTable(tempTableName))
|
||||||
}
|
}
|
||||||
|
|
||||||
override suspend fun countTable(tableName: TableName): Long? {
|
override suspend fun countTable(tableName: TableName): Long? {
|
||||||
@@ -251,7 +197,7 @@ class ClickhouseAirbyteClient(
|
|||||||
reader.next()
|
reader.next()
|
||||||
val count = reader.getLong("cnt")
|
val count = reader.getLong("cnt")
|
||||||
return count
|
return count
|
||||||
} catch (e: Exception) {
|
} catch (_: Exception) {
|
||||||
return null
|
return null
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -280,12 +226,16 @@ class ClickhouseAirbyteClient(
|
|||||||
}
|
}
|
||||||
|
|
||||||
private fun ClickHouseDataType.getDataTypeAsString(): String {
|
private fun ClickHouseDataType.getDataTypeAsString(): String {
|
||||||
return if (this.name == "DateTime64") {
|
return when (this.name) {
|
||||||
DATETIME_WITH_PRECISION
|
"DateTime64" -> {
|
||||||
} else if (this.name == "Decimal") {
|
DATETIME_WITH_PRECISION
|
||||||
DECIMAL_WITH_PRECISION_AND_SCALE
|
}
|
||||||
} else {
|
"Decimal" -> {
|
||||||
this.name
|
DECIMAL_WITH_PRECISION_AND_SCALE
|
||||||
|
}
|
||||||
|
else -> {
|
||||||
|
this.name
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,136 +4,62 @@
|
|||||||
|
|
||||||
package io.airbyte.integrations.destination.clickhouse.client
|
package io.airbyte.integrations.destination.clickhouse.client
|
||||||
|
|
||||||
import com.clickhouse.data.ClickHouseDataType
|
|
||||||
import io.airbyte.cdk.load.command.Dedupe
|
import io.airbyte.cdk.load.command.Dedupe
|
||||||
import io.airbyte.cdk.load.command.DestinationStream
|
|
||||||
import io.airbyte.cdk.load.component.ColumnChangeset
|
import io.airbyte.cdk.load.component.ColumnChangeset
|
||||||
import io.airbyte.cdk.load.component.ColumnType
|
import io.airbyte.cdk.load.component.ColumnType
|
||||||
import io.airbyte.cdk.load.data.AirbyteType
|
|
||||||
import io.airbyte.cdk.load.data.ArrayType
|
|
||||||
import io.airbyte.cdk.load.data.ArrayTypeWithoutSchema
|
|
||||||
import io.airbyte.cdk.load.data.BooleanType
|
|
||||||
import io.airbyte.cdk.load.data.DateType
|
|
||||||
import io.airbyte.cdk.load.data.IntegerType
|
|
||||||
import io.airbyte.cdk.load.data.NumberType
|
|
||||||
import io.airbyte.cdk.load.data.ObjectType
|
|
||||||
import io.airbyte.cdk.load.data.ObjectTypeWithEmptySchema
|
|
||||||
import io.airbyte.cdk.load.data.ObjectTypeWithoutSchema
|
|
||||||
import io.airbyte.cdk.load.data.StringType
|
|
||||||
import io.airbyte.cdk.load.data.TimeTypeWithTimezone
|
|
||||||
import io.airbyte.cdk.load.data.TimeTypeWithoutTimezone
|
|
||||||
import io.airbyte.cdk.load.data.TimestampTypeWithTimezone
|
|
||||||
import io.airbyte.cdk.load.data.TimestampTypeWithoutTimezone
|
|
||||||
import io.airbyte.cdk.load.data.UnionType
|
|
||||||
import io.airbyte.cdk.load.data.UnknownType
|
|
||||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_EXTRACTED_AT
|
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_EXTRACTED_AT
|
||||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_GENERATION_ID
|
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_GENERATION_ID
|
||||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_META
|
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_META
|
||||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_RAW_ID
|
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_RAW_ID
|
||||||
|
import io.airbyte.cdk.load.schema.model.StreamTableSchema
|
||||||
import io.airbyte.cdk.load.schema.model.TableName
|
import io.airbyte.cdk.load.schema.model.TableName
|
||||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
|
||||||
import io.airbyte.integrations.destination.clickhouse.client.ClickhouseSqlGenerator.Companion.DATETIME_WITH_PRECISION
|
|
||||||
import io.airbyte.integrations.destination.clickhouse.client.ClickhouseSqlGenerator.Companion.DECIMAL_WITH_PRECISION_AND_SCALE
|
|
||||||
import io.airbyte.integrations.destination.clickhouse.spec.ClickhouseConfiguration
|
|
||||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||||
import jakarta.inject.Singleton
|
import jakarta.inject.Singleton
|
||||||
|
|
||||||
@Singleton
|
@Singleton
|
||||||
class ClickhouseSqlGenerator(
|
class ClickhouseSqlGenerator {
|
||||||
val clickhouseConfiguration: ClickhouseConfiguration,
|
|
||||||
) {
|
|
||||||
private val log = KotlinLogging.logger {}
|
private val log = KotlinLogging.logger {}
|
||||||
|
|
||||||
/**
|
|
||||||
* This extension is here to avoid writing `.also { log.info { it }}` for every returned string
|
|
||||||
* we want to log
|
|
||||||
*/
|
|
||||||
private fun String.andLog(): String {
|
|
||||||
log.info { this }
|
|
||||||
return this
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun isValidVersionColumnType(airbyteType: AirbyteType): Boolean {
|
|
||||||
// Must be of an integer type or of type Date/DateTime/DateTime64
|
|
||||||
return VALID_VERSION_COLUMN_TYPES.any { it.isInstance(airbyteType) }
|
|
||||||
}
|
|
||||||
|
|
||||||
fun createNamespace(namespace: String): String {
|
fun createNamespace(namespace: String): String {
|
||||||
return "CREATE DATABASE IF NOT EXISTS `$namespace`;".andLog()
|
return "CREATE DATABASE IF NOT EXISTS `$namespace`;".andLog()
|
||||||
}
|
}
|
||||||
|
|
||||||
fun createTable(
|
fun createTable(
|
||||||
stream: DestinationStream,
|
|
||||||
tableName: TableName,
|
tableName: TableName,
|
||||||
columnNameMapping: ColumnNameMapping,
|
tableSchema: StreamTableSchema,
|
||||||
replace: Boolean,
|
replace: Boolean,
|
||||||
): String {
|
): String {
|
||||||
val pks: List<String> =
|
val forceCreateTable = if (replace) "OR REPLACE" else ""
|
||||||
when (stream.importType) {
|
|
||||||
is Dedupe -> extractPks((stream.importType as Dedupe).primaryKey, columnNameMapping)
|
|
||||||
else -> listOf()
|
|
||||||
}
|
|
||||||
|
|
||||||
// For ReplacingMergeTree, we need to make the cursor column non-nullable if it's used as
|
val finalSchema = tableSchema.columnSchema.finalSchema
|
||||||
// version column. We'll also determine here if we need to fall back to extracted_at.
|
val columnDeclarations =
|
||||||
var useCursorAsVersionColumn = false
|
finalSchema
|
||||||
val nonNullableColumns =
|
.map { (columnName, columnType) -> "`$columnName` ${columnType.typeDecl()}" }
|
||||||
mutableSetOf<String>().apply {
|
.joinToString(",\n")
|
||||||
addAll(pks) // Primary keys are always non-nullable
|
|
||||||
if (stream.importType is Dedupe) {
|
|
||||||
val dedupeType = stream.importType as Dedupe
|
|
||||||
if (dedupeType.cursor.isNotEmpty()) {
|
|
||||||
val cursorFieldName = dedupeType.cursor.first()
|
|
||||||
val cursorColumnName = columnNameMapping[cursorFieldName] ?: cursorFieldName
|
|
||||||
|
|
||||||
// Check if the cursor column type is valid for ClickHouse
|
val orderBy =
|
||||||
// ReplacingMergeTree
|
if (tableSchema.importType !is Dedupe) {
|
||||||
val cursorColumnType = stream.schema.asColumns()[cursorFieldName]?.type
|
COLUMN_NAME_AB_RAW_ID
|
||||||
if (
|
} else {
|
||||||
cursorColumnType != null && isValidVersionColumnType(cursorColumnType)
|
val pks = flattenPks(tableSchema.getPrimaryKey())
|
||||||
) {
|
pks.joinToString(",") {
|
||||||
// Cursor column is valid, use it as version column
|
// Escape the columns
|
||||||
add(cursorColumnName) // Make cursor column non-nullable too
|
"`$it`"
|
||||||
useCursorAsVersionColumn = true
|
|
||||||
} else {
|
|
||||||
// Cursor column is invalid, we'll fall back to _airbyte_extracted_at
|
|
||||||
log.warn {
|
|
||||||
"Cursor column '$cursorFieldName' for stream '${stream.mappedDescriptor}' has type '${cursorColumnType?.let { it::class.simpleName }}' which is not valid for use as a version column in ClickHouse ReplacingMergeTree. " +
|
|
||||||
"Falling back to using _airbyte_extracted_at as version column. Valid types are: Integer, Date, Timestamp."
|
|
||||||
}
|
|
||||||
useCursorAsVersionColumn = false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// If no cursor is specified or cursor is invalid, we'll use
|
|
||||||
// _airbyte_extracted_at
|
|
||||||
// as version column, which is already non-nullable by default (defined in
|
|
||||||
// CREATE TABLE statement)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
val columnDeclarations =
|
|
||||||
columnsAndTypes(stream, columnNameMapping, nonNullableColumns.toList())
|
|
||||||
|
|
||||||
val forceCreateTable = if (replace) "OR REPLACE" else ""
|
|
||||||
|
|
||||||
val pksAsString =
|
|
||||||
pks.joinToString(",") {
|
|
||||||
// Escape the columns
|
|
||||||
"`$it`"
|
|
||||||
}
|
|
||||||
|
|
||||||
val engine =
|
val engine =
|
||||||
when (stream.importType) {
|
when (tableSchema.importType) {
|
||||||
is Dedupe -> {
|
is Dedupe -> {
|
||||||
val dedupeType = stream.importType as Dedupe
|
// Check if cursor column type is valid for ClickHouse ReplacingMergeTree
|
||||||
// Use cursor column as version column for ReplacingMergeTree if available and
|
val cursor = tableSchema.getCursor().firstOrNull()
|
||||||
// valid
|
val cursorType = cursor?.let { finalSchema[it]?.type }
|
||||||
|
|
||||||
|
val useCursorAsVersion =
|
||||||
|
cursorType != null && isValidVersionColumn(cursor, cursorType)
|
||||||
val versionColumn =
|
val versionColumn =
|
||||||
if (dedupeType.cursor.isNotEmpty() && useCursorAsVersionColumn) {
|
if (useCursorAsVersion) {
|
||||||
val cursorFieldName = dedupeType.cursor.first()
|
"`$cursor`"
|
||||||
val cursorColumnName =
|
|
||||||
columnNameMapping[cursorFieldName] ?: cursorFieldName
|
|
||||||
"`$cursorColumnName`"
|
|
||||||
} else {
|
} else {
|
||||||
// Fallback to _airbyte_extracted_at if no cursor is specified or cursor
|
// Fallback to _airbyte_extracted_at if no cursor is specified or cursor
|
||||||
// is invalid
|
// is invalid
|
||||||
@@ -152,33 +78,13 @@ class ClickhouseSqlGenerator(
|
|||||||
$COLUMN_NAME_AB_GENERATION_ID UInt32 NOT NULL,
|
$COLUMN_NAME_AB_GENERATION_ID UInt32 NOT NULL,
|
||||||
$columnDeclarations
|
$columnDeclarations
|
||||||
)
|
)
|
||||||
ENGINE = ${engine}
|
ENGINE = $engine
|
||||||
ORDER BY (${if (pks.isEmpty()) {
|
ORDER BY ($orderBy)
|
||||||
"$COLUMN_NAME_AB_RAW_ID"
|
|
||||||
} else {
|
|
||||||
pksAsString
|
|
||||||
}})
|
|
||||||
"""
|
"""
|
||||||
.trimIndent()
|
.trimIndent()
|
||||||
.andLog()
|
.andLog()
|
||||||
}
|
}
|
||||||
|
|
||||||
internal fun extractPks(
|
|
||||||
primaryKey: List<List<String>>,
|
|
||||||
columnNameMapping: ColumnNameMapping
|
|
||||||
): List<String> {
|
|
||||||
return primaryKey.map { fieldPath ->
|
|
||||||
if (fieldPath.size != 1) {
|
|
||||||
throw UnsupportedOperationException(
|
|
||||||
"Only top-level primary keys are supported, got $fieldPath",
|
|
||||||
)
|
|
||||||
}
|
|
||||||
val fieldName = fieldPath.first()
|
|
||||||
val columnName = columnNameMapping[fieldName] ?: fieldName
|
|
||||||
columnName
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fun dropTable(tableName: TableName): String =
|
fun dropTable(tableName: TableName): String =
|
||||||
"DROP TABLE IF EXISTS `${tableName.namespace}`.`${tableName.name}`;".andLog()
|
"DROP TABLE IF EXISTS `${tableName.namespace}`.`${tableName.name}`;".andLog()
|
||||||
|
|
||||||
@@ -191,11 +97,11 @@ class ClickhouseSqlGenerator(
|
|||||||
.andLog()
|
.andLog()
|
||||||
|
|
||||||
fun copyTable(
|
fun copyTable(
|
||||||
columnNameMapping: ColumnNameMapping,
|
columnNames: Set<String>,
|
||||||
sourceTableName: TableName,
|
sourceTableName: TableName,
|
||||||
targetTableName: TableName,
|
targetTableName: TableName,
|
||||||
): String {
|
): String {
|
||||||
val columnNames = columnNameMapping.map { (_, actualName) -> actualName }.joinToString(",")
|
val joinedNames = columnNames.joinToString(",")
|
||||||
// TODO can we use CDK builtin stuff instead of hardcoding the airbyte meta columns?
|
// TODO can we use CDK builtin stuff instead of hardcoding the airbyte meta columns?
|
||||||
return """
|
return """
|
||||||
INSERT INTO `${targetTableName.namespace}`.`${targetTableName.name}`
|
INSERT INTO `${targetTableName.namespace}`.`${targetTableName.name}`
|
||||||
@@ -204,79 +110,20 @@ class ClickhouseSqlGenerator(
|
|||||||
$COLUMN_NAME_AB_EXTRACTED_AT,
|
$COLUMN_NAME_AB_EXTRACTED_AT,
|
||||||
$COLUMN_NAME_AB_META,
|
$COLUMN_NAME_AB_META,
|
||||||
$COLUMN_NAME_AB_GENERATION_ID,
|
$COLUMN_NAME_AB_GENERATION_ID,
|
||||||
$columnNames
|
$joinedNames
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
$COLUMN_NAME_AB_RAW_ID,
|
$COLUMN_NAME_AB_RAW_ID,
|
||||||
$COLUMN_NAME_AB_EXTRACTED_AT,
|
$COLUMN_NAME_AB_EXTRACTED_AT,
|
||||||
$COLUMN_NAME_AB_META,
|
$COLUMN_NAME_AB_META,
|
||||||
$COLUMN_NAME_AB_GENERATION_ID,
|
$COLUMN_NAME_AB_GENERATION_ID,
|
||||||
$columnNames
|
$joinedNames
|
||||||
FROM `${sourceTableName.namespace}`.`${sourceTableName.name}`
|
FROM `${sourceTableName.namespace}`.`${sourceTableName.name}`
|
||||||
"""
|
"""
|
||||||
.trimIndent()
|
.trimIndent()
|
||||||
.andLog()
|
.andLog()
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* A SQL SELECT statement that extracts records from the table and dedupes the records (since we
|
|
||||||
* only need the most-recent record to upsert).
|
|
||||||
*/
|
|
||||||
private fun selectDedupedRecords(
|
|
||||||
stream: DestinationStream,
|
|
||||||
sourceTableName: TableName,
|
|
||||||
columnNameMapping: ColumnNameMapping,
|
|
||||||
): String {
|
|
||||||
val columnList: String =
|
|
||||||
stream.schema.asColumns().keys.joinToString("\n") { fieldName ->
|
|
||||||
val columnName = columnNameMapping[fieldName]!!
|
|
||||||
"`$columnName`,"
|
|
||||||
}
|
|
||||||
|
|
||||||
val importType = stream.importType as Dedupe
|
|
||||||
|
|
||||||
// We need to dedupe the records. Note the row_number() invocation in
|
|
||||||
// the SQL statement. We only take the most-recent raw record for each PK.
|
|
||||||
val pkList =
|
|
||||||
importType.primaryKey.joinToString(",") { fieldName ->
|
|
||||||
val columnName = columnNameMapping[fieldName.first()]!!
|
|
||||||
"`$columnName`"
|
|
||||||
}
|
|
||||||
val cursorOrderClause =
|
|
||||||
if (importType.cursor.isEmpty()) {
|
|
||||||
""
|
|
||||||
} else if (importType.cursor.size == 1) {
|
|
||||||
val columnName = columnNameMapping[importType.cursor.first()]!!
|
|
||||||
"`$columnName` DESC NULLS LAST,"
|
|
||||||
} else {
|
|
||||||
throw UnsupportedOperationException(
|
|
||||||
"Only top-level cursors are supported, got ${importType.cursor}",
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
return """
|
|
||||||
WITH records AS (
|
|
||||||
SELECT
|
|
||||||
$columnList
|
|
||||||
$COLUMN_NAME_AB_META,
|
|
||||||
$COLUMN_NAME_AB_RAW_ID,
|
|
||||||
$COLUMN_NAME_AB_EXTRACTED_AT,
|
|
||||||
$COLUMN_NAME_AB_GENERATION_ID
|
|
||||||
FROM `${sourceTableName.namespace}`.`${sourceTableName.name}`
|
|
||||||
), numbered_rows AS (
|
|
||||||
SELECT *, row_number() OVER (
|
|
||||||
PARTITION BY $pkList ORDER BY $cursorOrderClause `$COLUMN_NAME_AB_EXTRACTED_AT` DESC
|
|
||||||
) AS row_number
|
|
||||||
FROM records
|
|
||||||
)
|
|
||||||
SELECT $columnList $COLUMN_NAME_AB_META, $COLUMN_NAME_AB_RAW_ID, $COLUMN_NAME_AB_EXTRACTED_AT, $COLUMN_NAME_AB_GENERATION_ID
|
|
||||||
FROM numbered_rows
|
|
||||||
WHERE row_number = 1
|
|
||||||
"""
|
|
||||||
.trimIndent()
|
|
||||||
.andLog()
|
|
||||||
}
|
|
||||||
|
|
||||||
fun countTable(
|
fun countTable(
|
||||||
tableName: TableName,
|
tableName: TableName,
|
||||||
alias: String = "",
|
alias: String = "",
|
||||||
@@ -297,21 +144,6 @@ class ClickhouseSqlGenerator(
|
|||||||
.trimIndent()
|
.trimIndent()
|
||||||
.andLog()
|
.andLog()
|
||||||
|
|
||||||
private fun columnsAndTypes(
|
|
||||||
stream: DestinationStream,
|
|
||||||
columnNameMapping: ColumnNameMapping,
|
|
||||||
nonNullableColumns: List<String>,
|
|
||||||
): String {
|
|
||||||
return stream.schema
|
|
||||||
.asColumns()
|
|
||||||
.map { (fieldName, type) ->
|
|
||||||
val columnName = columnNameMapping[fieldName]!!
|
|
||||||
val typeName = type.type.toDialectType(clickhouseConfiguration.enableJson)
|
|
||||||
"`$columnName` ${typeDecl(typeName, !nonNullableColumns.contains(columnName))}"
|
|
||||||
}
|
|
||||||
.joinToString(",\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
fun alterTable(alterationSummary: ColumnChangeset, tableName: TableName): String {
|
fun alterTable(alterationSummary: ColumnChangeset, tableName: TableName): String {
|
||||||
val builder =
|
val builder =
|
||||||
StringBuilder()
|
StringBuilder()
|
||||||
@@ -330,53 +162,36 @@ class ClickhouseSqlGenerator(
|
|||||||
return builder.dropLast(1).toString().andLog()
|
return builder.dropLast(1).toString().andLog()
|
||||||
}
|
}
|
||||||
|
|
||||||
companion object {
|
fun ColumnType.typeDecl() =
|
||||||
const val DATETIME_WITH_PRECISION = "DateTime64(3)"
|
if (nullable) {
|
||||||
const val DECIMAL_WITH_PRECISION_AND_SCALE = "Decimal(38, 9)"
|
"Nullable($type)"
|
||||||
|
} else {
|
||||||
|
type
|
||||||
|
}
|
||||||
|
|
||||||
private val VALID_VERSION_COLUMN_TYPES =
|
/**
|
||||||
setOf(
|
* TODO: this is really a schema validation function and should probably run on startup long
|
||||||
IntegerType::class,
|
* before we go to create a table.
|
||||||
DateType::class,
|
*/
|
||||||
TimestampTypeWithTimezone::class,
|
internal fun flattenPks(
|
||||||
TimestampTypeWithoutTimezone::class,
|
primaryKey: List<List<String>>,
|
||||||
)
|
): List<String> {
|
||||||
}
|
return primaryKey.map { fieldPath ->
|
||||||
}
|
if (fieldPath.size != 1) {
|
||||||
|
throw UnsupportedOperationException(
|
||||||
fun String.sqlNullable(): String = "Nullable($this)"
|
"Only top-level primary keys are supported, got $fieldPath",
|
||||||
|
)
|
||||||
fun AirbyteType.toDialectType(enableJson: Boolean): String =
|
|
||||||
when (this) {
|
|
||||||
BooleanType -> ClickHouseDataType.Bool.name
|
|
||||||
DateType -> ClickHouseDataType.Date32.name
|
|
||||||
IntegerType -> ClickHouseDataType.Int64.name
|
|
||||||
NumberType -> DECIMAL_WITH_PRECISION_AND_SCALE
|
|
||||||
StringType -> ClickHouseDataType.String.name
|
|
||||||
TimeTypeWithTimezone -> ClickHouseDataType.String.name
|
|
||||||
TimeTypeWithoutTimezone -> ClickHouseDataType.String.name
|
|
||||||
TimestampTypeWithTimezone,
|
|
||||||
TimestampTypeWithoutTimezone -> DATETIME_WITH_PRECISION
|
|
||||||
is ArrayType,
|
|
||||||
ArrayTypeWithoutSchema,
|
|
||||||
is UnionType,
|
|
||||||
is UnknownType -> ClickHouseDataType.String.name
|
|
||||||
ObjectTypeWithEmptySchema,
|
|
||||||
ObjectTypeWithoutSchema,
|
|
||||||
is ObjectType -> {
|
|
||||||
if (enableJson) {
|
|
||||||
ClickHouseDataType.JSON.name
|
|
||||||
} else {
|
|
||||||
ClickHouseDataType.String.name
|
|
||||||
}
|
}
|
||||||
|
fieldPath.first()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fun typeDecl(type: String, nullable: Boolean) =
|
/**
|
||||||
if (nullable) {
|
* This extension is here to avoid writing `.also { log.info { it }}` for every returned string
|
||||||
type.sqlNullable()
|
* we want to log
|
||||||
} else {
|
*/
|
||||||
type
|
private fun String.andLog(): String {
|
||||||
|
log.info { this }
|
||||||
|
return this
|
||||||
}
|
}
|
||||||
|
}
|
||||||
fun ColumnType.typeDecl() = typeDecl(this.type, this.nullable)
|
|
||||||
|
|||||||
@@ -0,0 +1,32 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package io.airbyte.integrations.destination.clickhouse.client
|
||||||
|
|
||||||
|
import io.airbyte.cdk.load.table.CDC_CURSOR_COLUMN
|
||||||
|
import io.airbyte.integrations.destination.clickhouse.client.ClickhouseSqlTypes.VALID_VERSION_COLUMN_TYPES
|
||||||
|
|
||||||
|
object ClickhouseSqlTypes {
|
||||||
|
const val DATETIME_WITH_PRECISION = "DateTime64(3)"
|
||||||
|
const val DECIMAL_WITH_PRECISION_AND_SCALE = "Decimal(38, 9)"
|
||||||
|
const val BOOL = "Bool"
|
||||||
|
const val DATE32 = "Date32"
|
||||||
|
const val INT64 = "Int64"
|
||||||
|
const val STRING = "String"
|
||||||
|
const val JSON = "JSON"
|
||||||
|
|
||||||
|
val VALID_VERSION_COLUMN_TYPES =
|
||||||
|
setOf(
|
||||||
|
INT64,
|
||||||
|
DATE32,
|
||||||
|
DATETIME_WITH_PRECISION,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Warning: if any munging changes the name of the CDC column name this will break.
|
||||||
|
// Currently, that is not the case.
|
||||||
|
fun isValidVersionColumn(name: String, type: String) =
|
||||||
|
// CDC cursors cannot be used as a version column since they are null
|
||||||
|
// during the initial CDC snapshot.
|
||||||
|
name != CDC_CURSOR_COLUMN && VALID_VERSION_COLUMN_TYPES.contains(type)
|
||||||
@@ -1,62 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package io.airbyte.integrations.destination.clickhouse.config
|
|
||||||
|
|
||||||
import io.airbyte.cdk.load.command.DestinationStream
|
|
||||||
import io.airbyte.cdk.load.data.Transformations.Companion.toAlphanumericAndUnderscore
|
|
||||||
import io.airbyte.cdk.load.schema.model.TableName
|
|
||||||
import io.airbyte.cdk.load.table.ColumnNameGenerator
|
|
||||||
import io.airbyte.cdk.load.table.FinalTableNameGenerator
|
|
||||||
import io.airbyte.integrations.destination.clickhouse.spec.ClickhouseConfiguration
|
|
||||||
import jakarta.inject.Singleton
|
|
||||||
import java.util.Locale
|
|
||||||
import java.util.UUID
|
|
||||||
|
|
||||||
@Singleton
|
|
||||||
class ClickhouseFinalTableNameGenerator(private val config: ClickhouseConfiguration) :
|
|
||||||
FinalTableNameGenerator {
|
|
||||||
override fun getTableName(streamDescriptor: DestinationStream.Descriptor) =
|
|
||||||
TableName(
|
|
||||||
namespace =
|
|
||||||
(streamDescriptor.namespace ?: config.resolvedDatabase)
|
|
||||||
.toClickHouseCompatibleName(),
|
|
||||||
name = streamDescriptor.name.toClickHouseCompatibleName(),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
@Singleton
|
|
||||||
class ClickhouseColumnNameGenerator : ColumnNameGenerator {
|
|
||||||
override fun getColumnName(column: String): ColumnNameGenerator.ColumnName {
|
|
||||||
return ColumnNameGenerator.ColumnName(
|
|
||||||
column.toClickHouseCompatibleName(),
|
|
||||||
column.lowercase(Locale.getDefault()).toClickHouseCompatibleName(),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Transforms a string to be compatible with ClickHouse table and column names.
|
|
||||||
*
|
|
||||||
* @return The transformed string suitable for ClickHouse identifiers.
|
|
||||||
*/
|
|
||||||
fun String.toClickHouseCompatibleName(): String {
|
|
||||||
// 1. Replace any character that is not a letter,
|
|
||||||
// a digit (0-9), or an underscore (_) with a single underscore.
|
|
||||||
var transformed = toAlphanumericAndUnderscore(this)
|
|
||||||
|
|
||||||
// 2. Ensure the identifier does not start with a digit.
|
|
||||||
// If it starts with a digit, prepend an underscore.
|
|
||||||
if (transformed.isNotEmpty() && transformed[0].isDigit()) {
|
|
||||||
transformed = "_$transformed"
|
|
||||||
}
|
|
||||||
|
|
||||||
// 3.Do not allow empty strings.
|
|
||||||
if (transformed.isEmpty()) {
|
|
||||||
return "default_name_${UUID.randomUUID()}" // A fallback name if the input results in an
|
|
||||||
// empty string
|
|
||||||
}
|
|
||||||
|
|
||||||
return transformed
|
|
||||||
}
|
|
||||||
@@ -0,0 +1,33 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package io.airbyte.integrations.destination.clickhouse.schema
|
||||||
|
|
||||||
|
import io.airbyte.cdk.load.data.Transformations.Companion.toAlphanumericAndUnderscore
|
||||||
|
import java.util.UUID
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Transforms a string to be compatible with ClickHouse table and column names.
|
||||||
|
*
|
||||||
|
* @return The transformed string suitable for ClickHouse identifiers.
|
||||||
|
*/
|
||||||
|
fun String.toClickHouseCompatibleName(): String {
|
||||||
|
// 1. Replace any character that is not a letter,
|
||||||
|
// a digit (0-9), or an underscore (_) with a single underscore.
|
||||||
|
var transformed = toAlphanumericAndUnderscore(this)
|
||||||
|
|
||||||
|
// 2.Do not allow empty strings.
|
||||||
|
if (transformed.isEmpty()) {
|
||||||
|
return "default_name_${UUID.randomUUID()}" // A fallback name if the input results in an
|
||||||
|
// empty string
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Ensure the identifier does not start with a digit.
|
||||||
|
// If it starts with a digit, prepend an underscore.
|
||||||
|
if (transformed[0].isDigit()) {
|
||||||
|
transformed = "_$transformed"
|
||||||
|
}
|
||||||
|
|
||||||
|
return transformed
|
||||||
|
}
|
||||||
@@ -4,6 +4,7 @@
|
|||||||
|
|
||||||
package io.airbyte.integrations.destination.clickhouse.schema
|
package io.airbyte.integrations.destination.clickhouse.schema
|
||||||
|
|
||||||
|
import io.airbyte.cdk.load.command.Dedupe
|
||||||
import io.airbyte.cdk.load.command.DestinationStream
|
import io.airbyte.cdk.load.command.DestinationStream
|
||||||
import io.airbyte.cdk.load.component.ColumnType
|
import io.airbyte.cdk.load.component.ColumnType
|
||||||
import io.airbyte.cdk.load.data.ArrayType
|
import io.airbyte.cdk.load.data.ArrayType
|
||||||
@@ -24,11 +25,11 @@ import io.airbyte.cdk.load.data.TimestampTypeWithoutTimezone
|
|||||||
import io.airbyte.cdk.load.data.UnionType
|
import io.airbyte.cdk.load.data.UnionType
|
||||||
import io.airbyte.cdk.load.data.UnknownType
|
import io.airbyte.cdk.load.data.UnknownType
|
||||||
import io.airbyte.cdk.load.schema.TableSchemaMapper
|
import io.airbyte.cdk.load.schema.TableSchemaMapper
|
||||||
|
import io.airbyte.cdk.load.schema.model.StreamTableSchema
|
||||||
import io.airbyte.cdk.load.schema.model.TableName
|
import io.airbyte.cdk.load.schema.model.TableName
|
||||||
import io.airbyte.cdk.load.table.TempTableNameGenerator
|
import io.airbyte.cdk.load.table.TempTableNameGenerator
|
||||||
import io.airbyte.integrations.destination.clickhouse.client.ClickhouseSqlGenerator.Companion.DATETIME_WITH_PRECISION
|
import io.airbyte.integrations.destination.clickhouse.client.ClickhouseSqlTypes
|
||||||
import io.airbyte.integrations.destination.clickhouse.client.ClickhouseSqlGenerator.Companion.DECIMAL_WITH_PRECISION_AND_SCALE
|
import io.airbyte.integrations.destination.clickhouse.client.isValidVersionColumn
|
||||||
import io.airbyte.integrations.destination.clickhouse.config.toClickHouseCompatibleName
|
|
||||||
import io.airbyte.integrations.destination.clickhouse.spec.ClickhouseConfiguration
|
import io.airbyte.integrations.destination.clickhouse.spec.ClickhouseConfiguration
|
||||||
import jakarta.inject.Singleton
|
import jakarta.inject.Singleton
|
||||||
|
|
||||||
@@ -55,30 +56,66 @@ class ClickhouseTableSchemaMapper(
|
|||||||
// Map Airbyte field types to ClickHouse column types
|
// Map Airbyte field types to ClickHouse column types
|
||||||
val clickhouseType =
|
val clickhouseType =
|
||||||
when (fieldType.type) {
|
when (fieldType.type) {
|
||||||
BooleanType -> "Bool"
|
BooleanType -> ClickhouseSqlTypes.BOOL
|
||||||
DateType -> "Date32"
|
DateType -> ClickhouseSqlTypes.DATE32
|
||||||
IntegerType -> "Int64"
|
IntegerType -> ClickhouseSqlTypes.INT64
|
||||||
NumberType -> DECIMAL_WITH_PRECISION_AND_SCALE
|
NumberType -> ClickhouseSqlTypes.DECIMAL_WITH_PRECISION_AND_SCALE
|
||||||
StringType -> "String"
|
StringType -> ClickhouseSqlTypes.STRING
|
||||||
TimeTypeWithTimezone -> "String"
|
TimeTypeWithTimezone -> ClickhouseSqlTypes.STRING
|
||||||
TimeTypeWithoutTimezone -> "String"
|
TimeTypeWithoutTimezone -> ClickhouseSqlTypes.STRING
|
||||||
TimestampTypeWithTimezone,
|
TimestampTypeWithTimezone,
|
||||||
TimestampTypeWithoutTimezone -> DATETIME_WITH_PRECISION
|
TimestampTypeWithoutTimezone -> ClickhouseSqlTypes.DATETIME_WITH_PRECISION
|
||||||
is ArrayType,
|
is ArrayType,
|
||||||
ArrayTypeWithoutSchema,
|
ArrayTypeWithoutSchema,
|
||||||
is UnionType,
|
is UnionType,
|
||||||
is UnknownType -> "String"
|
is UnknownType -> ClickhouseSqlTypes.STRING
|
||||||
ObjectTypeWithEmptySchema,
|
ObjectTypeWithEmptySchema,
|
||||||
ObjectTypeWithoutSchema,
|
ObjectTypeWithoutSchema,
|
||||||
is ObjectType -> {
|
is ObjectType -> {
|
||||||
if (config.enableJson) {
|
if (config.enableJson) {
|
||||||
"JSON"
|
ClickhouseSqlTypes.JSON
|
||||||
} else {
|
} else {
|
||||||
"String"
|
ClickhouseSqlTypes.STRING
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ColumnType(clickhouseType, fieldType.nullable)
|
return ColumnType(clickhouseType, fieldType.nullable)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
override fun toFinalSchema(tableSchema: StreamTableSchema): StreamTableSchema {
|
||||||
|
if (tableSchema.importType !is Dedupe) {
|
||||||
|
return tableSchema
|
||||||
|
}
|
||||||
|
|
||||||
|
// For dedupe mode we do extra logic to ensure certain columns are non-null:
|
||||||
|
// 1) the primary key columns
|
||||||
|
// 2) the version column used by the dedupe engine (in practice the cursor)
|
||||||
|
val pks = tableSchema.getPrimaryKey().flatten()
|
||||||
|
val cursor = tableSchema.getCursor().firstOrNull()
|
||||||
|
|
||||||
|
val nonNullCols = buildSet {
|
||||||
|
addAll(pks) // Primary keys are always non-nullable
|
||||||
|
if (cursor != null) {
|
||||||
|
// Check if the cursor column type is valid for ClickHouse ReplacingMergeTree
|
||||||
|
val cursorColumnType = tableSchema.columnSchema.finalSchema[cursor]!!.type
|
||||||
|
if (isValidVersionColumn(cursor, cursorColumnType)) {
|
||||||
|
// Cursor column is valid, use it as version column
|
||||||
|
add(cursor) // Make cursor column non-nullable too
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
val finalSchema =
|
||||||
|
tableSchema.columnSchema.finalSchema
|
||||||
|
.map {
|
||||||
|
it.key to
|
||||||
|
it.value.copy(nullable = it.value.nullable && !nonNullCols.contains(it.key))
|
||||||
|
}
|
||||||
|
.toMap()
|
||||||
|
|
||||||
|
return tableSchema.copy(
|
||||||
|
columnSchema = tableSchema.columnSchema.copy(finalSchema = finalSchema)
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,77 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package io.airbyte.integrations.destination.clickhouse.component
|
||||||
|
|
||||||
|
import io.airbyte.cdk.load.component.DataCoercionNumberFixtures
|
||||||
|
import io.airbyte.cdk.load.component.DataCoercionNumberFixtures.NEGATIVE_HIGH_PRECISION_FLOAT
|
||||||
|
import io.airbyte.cdk.load.component.DataCoercionNumberFixtures.POSITIVE_HIGH_PRECISION_FLOAT
|
||||||
|
import io.airbyte.cdk.load.component.DataCoercionNumberFixtures.SMALLEST_NEGATIVE_FLOAT32
|
||||||
|
import io.airbyte.cdk.load.component.DataCoercionNumberFixtures.SMALLEST_NEGATIVE_FLOAT64
|
||||||
|
import io.airbyte.cdk.load.component.DataCoercionNumberFixtures.SMALLEST_POSITIVE_FLOAT32
|
||||||
|
import io.airbyte.cdk.load.component.DataCoercionNumberFixtures.SMALLEST_POSITIVE_FLOAT64
|
||||||
|
import io.airbyte.cdk.load.component.DataCoercionSuite
|
||||||
|
import io.airbyte.cdk.load.component.TableOperationsClient
|
||||||
|
import io.airbyte.cdk.load.component.TestTableOperationsClient
|
||||||
|
import io.airbyte.cdk.load.component.toArgs
|
||||||
|
import io.airbyte.cdk.load.data.AirbyteValue
|
||||||
|
import io.airbyte.cdk.load.dataflow.transform.ValueCoercer
|
||||||
|
import io.airbyte.cdk.load.schema.TableSchemaFactory
|
||||||
|
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange.Reason
|
||||||
|
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
|
||||||
|
import org.junit.jupiter.params.ParameterizedTest
|
||||||
|
import org.junit.jupiter.params.provider.MethodSource
|
||||||
|
|
||||||
|
@MicronautTest(environments = ["component"], resolveParameters = false)
|
||||||
|
class ClickhouseDataCoercionTest(
|
||||||
|
override val coercer: ValueCoercer,
|
||||||
|
override val opsClient: TableOperationsClient,
|
||||||
|
override val testClient: TestTableOperationsClient,
|
||||||
|
override val schemaFactory: TableSchemaFactory,
|
||||||
|
) : DataCoercionSuite {
|
||||||
|
@ParameterizedTest
|
||||||
|
// We use clickhouse's Int64 type for integers
|
||||||
|
@MethodSource("io.airbyte.cdk.load.component.DataCoercionIntegerFixtures#int64")
|
||||||
|
override fun `handle integer values`(
|
||||||
|
inputValue: AirbyteValue,
|
||||||
|
expectedValue: Any?,
|
||||||
|
expectedChangeReason: Reason?
|
||||||
|
) {
|
||||||
|
super.`handle integer values`(inputValue, expectedValue, expectedChangeReason)
|
||||||
|
}
|
||||||
|
|
||||||
|
@ParameterizedTest
|
||||||
|
@MethodSource(
|
||||||
|
"io.airbyte.integrations.destination.clickhouse.component.ClickhouseDataCoercionTest#numbers"
|
||||||
|
)
|
||||||
|
override fun `handle number values`(
|
||||||
|
inputValue: AirbyteValue,
|
||||||
|
expectedValue: Any?,
|
||||||
|
expectedChangeReason: Reason?
|
||||||
|
) {
|
||||||
|
super.`handle number values`(inputValue, expectedValue, expectedChangeReason)
|
||||||
|
}
|
||||||
|
|
||||||
|
companion object {
|
||||||
|
/**
|
||||||
|
* destination-clickhouse doesn't set a change reason when truncating high-precision numbers
|
||||||
|
* (https://github.com/airbytehq/airbyte-internal-issues/issues/15401)
|
||||||
|
*/
|
||||||
|
@JvmStatic
|
||||||
|
fun numbers() =
|
||||||
|
DataCoercionNumberFixtures.numeric38_9
|
||||||
|
.map {
|
||||||
|
when (it.name) {
|
||||||
|
POSITIVE_HIGH_PRECISION_FLOAT,
|
||||||
|
NEGATIVE_HIGH_PRECISION_FLOAT,
|
||||||
|
SMALLEST_POSITIVE_FLOAT32,
|
||||||
|
SMALLEST_NEGATIVE_FLOAT32,
|
||||||
|
SMALLEST_POSITIVE_FLOAT64,
|
||||||
|
SMALLEST_NEGATIVE_FLOAT64 -> it.copy(changeReason = null)
|
||||||
|
else -> it
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.toArgs()
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -6,6 +6,7 @@ package io.airbyte.integrations.destination.clickhouse.component
|
|||||||
|
|
||||||
import io.airbyte.cdk.load.component.TableOperationsSuite
|
import io.airbyte.cdk.load.component.TableOperationsSuite
|
||||||
import io.airbyte.cdk.load.component.TestTableOperationsClient
|
import io.airbyte.cdk.load.component.TestTableOperationsClient
|
||||||
|
import io.airbyte.cdk.load.schema.TableSchemaFactory
|
||||||
import io.airbyte.integrations.destination.clickhouse.client.ClickhouseAirbyteClient
|
import io.airbyte.integrations.destination.clickhouse.client.ClickhouseAirbyteClient
|
||||||
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
|
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
|
||||||
import jakarta.inject.Inject
|
import jakarta.inject.Inject
|
||||||
@@ -15,6 +16,7 @@ import org.junit.jupiter.api.Test
|
|||||||
class ClickhouseTableOperationsTest : TableOperationsSuite {
|
class ClickhouseTableOperationsTest : TableOperationsSuite {
|
||||||
@Inject override lateinit var client: ClickhouseAirbyteClient
|
@Inject override lateinit var client: ClickhouseAirbyteClient
|
||||||
@Inject override lateinit var testClient: TestTableOperationsClient
|
@Inject override lateinit var testClient: TestTableOperationsClient
|
||||||
|
@Inject override lateinit var schemaFactory: TableSchemaFactory
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
override fun `connect to database`() {
|
override fun `connect to database`() {
|
||||||
|
|||||||
@@ -12,6 +12,8 @@ import io.airbyte.cdk.load.component.TableSchemaEvolutionClient
|
|||||||
import io.airbyte.cdk.load.component.TableSchemaEvolutionFixtures
|
import io.airbyte.cdk.load.component.TableSchemaEvolutionFixtures
|
||||||
import io.airbyte.cdk.load.component.TableSchemaEvolutionSuite
|
import io.airbyte.cdk.load.component.TableSchemaEvolutionSuite
|
||||||
import io.airbyte.cdk.load.component.TestTableOperationsClient
|
import io.airbyte.cdk.load.component.TestTableOperationsClient
|
||||||
|
import io.airbyte.cdk.load.schema.TableSchemaFactory
|
||||||
|
import io.airbyte.integrations.destination.clickhouse.client.ClickhouseSqlTypes
|
||||||
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
|
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
|
||||||
import org.junit.jupiter.api.Test
|
import org.junit.jupiter.api.Test
|
||||||
|
|
||||||
@@ -19,24 +21,25 @@ import org.junit.jupiter.api.Test
|
|||||||
class ClickhouseTableSchemaEvolutionTest(
|
class ClickhouseTableSchemaEvolutionTest(
|
||||||
override val client: TableSchemaEvolutionClient,
|
override val client: TableSchemaEvolutionClient,
|
||||||
override val opsClient: TableOperationsClient,
|
override val opsClient: TableOperationsClient,
|
||||||
override val testClient: TestTableOperationsClient
|
override val testClient: TestTableOperationsClient,
|
||||||
|
override val schemaFactory: TableSchemaFactory,
|
||||||
) : TableSchemaEvolutionSuite {
|
) : TableSchemaEvolutionSuite {
|
||||||
private val allTypesTableSchema =
|
private val allTypesTableSchema =
|
||||||
TableSchema(
|
TableSchema(
|
||||||
mapOf(
|
mapOf(
|
||||||
"string" to ColumnType("String", true),
|
"string" to ColumnType(ClickhouseSqlTypes.STRING, true),
|
||||||
"boolean" to ColumnType("Bool", true),
|
"boolean" to ColumnType(ClickhouseSqlTypes.BOOL, true),
|
||||||
"integer" to ColumnType("Int64", true),
|
"integer" to ColumnType(ClickhouseSqlTypes.INT64, true),
|
||||||
"number" to ColumnType("Decimal(38, 9)", true),
|
"number" to ColumnType(ClickhouseSqlTypes.DECIMAL_WITH_PRECISION_AND_SCALE, true),
|
||||||
"date" to ColumnType("Date32", true),
|
"date" to ColumnType(ClickhouseSqlTypes.DATE32, true),
|
||||||
"timestamp_tz" to ColumnType("DateTime64(3)", true),
|
"timestamp_tz" to ColumnType(ClickhouseSqlTypes.DATETIME_WITH_PRECISION, true),
|
||||||
"timestamp_ntz" to ColumnType("DateTime64(3)", true),
|
"timestamp_ntz" to ColumnType(ClickhouseSqlTypes.DATETIME_WITH_PRECISION, true),
|
||||||
"time_tz" to ColumnType("String", true),
|
"time_tz" to ColumnType(ClickhouseSqlTypes.STRING, true),
|
||||||
"time_ntz" to ColumnType("String", true),
|
"time_ntz" to ColumnType(ClickhouseSqlTypes.STRING, true),
|
||||||
// yes, these three are different
|
// yes, these three are different
|
||||||
"array" to ColumnType("String", true),
|
"array" to ColumnType(ClickhouseSqlTypes.STRING, true),
|
||||||
"object" to ColumnType("JSON", true),
|
"object" to ColumnType(ClickhouseSqlTypes.JSON, true),
|
||||||
"unknown" to ColumnType("String", true),
|
"unknown" to ColumnType(ClickhouseSqlTypes.STRING, true),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ import io.airbyte.cdk.load.data.TimestampWithTimezoneValue
|
|||||||
import io.airbyte.cdk.load.data.TimestampWithoutTimezoneValue
|
import io.airbyte.cdk.load.data.TimestampWithoutTimezoneValue
|
||||||
import io.airbyte.cdk.load.test.util.ExpectedRecordMapper
|
import io.airbyte.cdk.load.test.util.ExpectedRecordMapper
|
||||||
import io.airbyte.cdk.load.test.util.OutputRecord
|
import io.airbyte.cdk.load.test.util.OutputRecord
|
||||||
import io.airbyte.integrations.destination.clickhouse.config.toClickHouseCompatibleName
|
import io.airbyte.integrations.destination.clickhouse.schema.toClickHouseCompatibleName
|
||||||
import java.math.RoundingMode
|
import java.math.RoundingMode
|
||||||
import java.time.LocalTime
|
import java.time.LocalTime
|
||||||
import java.time.ZoneOffset
|
import java.time.ZoneOffset
|
||||||
|
|||||||
@@ -30,8 +30,8 @@ import io.airbyte.cdk.load.write.UnknownTypesBehavior
|
|||||||
import io.airbyte.integrations.destination.clickhouse.ClickhouseConfigUpdater
|
import io.airbyte.integrations.destination.clickhouse.ClickhouseConfigUpdater
|
||||||
import io.airbyte.integrations.destination.clickhouse.ClickhouseContainerHelper
|
import io.airbyte.integrations.destination.clickhouse.ClickhouseContainerHelper
|
||||||
import io.airbyte.integrations.destination.clickhouse.Utils
|
import io.airbyte.integrations.destination.clickhouse.Utils
|
||||||
import io.airbyte.integrations.destination.clickhouse.config.toClickHouseCompatibleName
|
|
||||||
import io.airbyte.integrations.destination.clickhouse.fixtures.ClickhouseExpectedRecordMapper
|
import io.airbyte.integrations.destination.clickhouse.fixtures.ClickhouseExpectedRecordMapper
|
||||||
|
import io.airbyte.integrations.destination.clickhouse.schema.toClickHouseCompatibleName
|
||||||
import io.airbyte.integrations.destination.clickhouse.spec.ClickhouseConfiguration
|
import io.airbyte.integrations.destination.clickhouse.spec.ClickhouseConfiguration
|
||||||
import io.airbyte.integrations.destination.clickhouse.spec.ClickhouseConfigurationFactory
|
import io.airbyte.integrations.destination.clickhouse.spec.ClickhouseConfigurationFactory
|
||||||
import io.airbyte.integrations.destination.clickhouse.spec.ClickhouseSpecificationOss
|
import io.airbyte.integrations.destination.clickhouse.spec.ClickhouseSpecificationOss
|
||||||
|
|||||||
@@ -17,11 +17,10 @@ import io.airbyte.cdk.load.component.TableSchema
|
|||||||
import io.airbyte.cdk.load.data.FieldType
|
import io.airbyte.cdk.load.data.FieldType
|
||||||
import io.airbyte.cdk.load.data.StringType
|
import io.airbyte.cdk.load.data.StringType
|
||||||
import io.airbyte.cdk.load.message.Meta
|
import io.airbyte.cdk.load.message.Meta
|
||||||
|
import io.airbyte.cdk.load.schema.model.StreamTableSchema
|
||||||
import io.airbyte.cdk.load.schema.model.TableName
|
import io.airbyte.cdk.load.schema.model.TableName
|
||||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||||
import io.airbyte.cdk.load.table.TempTableNameGenerator
|
import io.airbyte.cdk.load.table.TempTableNameGenerator
|
||||||
import io.airbyte.integrations.destination.clickhouse.config.ClickhouseFinalTableNameGenerator
|
|
||||||
import io.airbyte.integrations.destination.clickhouse.spec.ClickhouseConfiguration
|
|
||||||
import io.mockk.coEvery
|
import io.mockk.coEvery
|
||||||
import io.mockk.coVerify
|
import io.mockk.coVerify
|
||||||
import io.mockk.coVerifyOrder
|
import io.mockk.coVerifyOrder
|
||||||
@@ -39,10 +38,7 @@ class ClickhouseAirbyteClientTest {
|
|||||||
// Mocks
|
// Mocks
|
||||||
private val client: ClickHouseClientRaw = mockk(relaxed = true)
|
private val client: ClickHouseClientRaw = mockk(relaxed = true)
|
||||||
private val clickhouseSqlGenerator: ClickhouseSqlGenerator = mockk(relaxed = true)
|
private val clickhouseSqlGenerator: ClickhouseSqlGenerator = mockk(relaxed = true)
|
||||||
private val clickhouseFinalTableNameGenerator: ClickhouseFinalTableNameGenerator =
|
|
||||||
mockk(relaxed = true)
|
|
||||||
private val tempTableNameGenerator: TempTableNameGenerator = mockk(relaxed = true)
|
private val tempTableNameGenerator: TempTableNameGenerator = mockk(relaxed = true)
|
||||||
private val clickhouseConfiguration: ClickhouseConfiguration = mockk(relaxed = true)
|
|
||||||
|
|
||||||
// Client
|
// Client
|
||||||
private val clickhouseAirbyteClient =
|
private val clickhouseAirbyteClient =
|
||||||
@@ -51,7 +47,6 @@ class ClickhouseAirbyteClientTest {
|
|||||||
client,
|
client,
|
||||||
clickhouseSqlGenerator,
|
clickhouseSqlGenerator,
|
||||||
tempTableNameGenerator,
|
tempTableNameGenerator,
|
||||||
clickhouseConfiguration
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -107,7 +102,6 @@ class ClickhouseAirbyteClientTest {
|
|||||||
alterTableStatement
|
alterTableStatement
|
||||||
coEvery { clickhouseAirbyteClient.execute(alterTableStatement) } returns
|
coEvery { clickhouseAirbyteClient.execute(alterTableStatement) } returns
|
||||||
mockk(relaxed = true)
|
mockk(relaxed = true)
|
||||||
every { clickhouseFinalTableNameGenerator.getTableName(any()) } returns mockTableName
|
|
||||||
|
|
||||||
mockCHSchemaWithAirbyteColumns()
|
mockCHSchemaWithAirbyteColumns()
|
||||||
|
|
||||||
@@ -125,6 +119,16 @@ class ClickhouseAirbyteClientTest {
|
|||||||
every { asColumns() } returns LinkedHashMap.newLinkedHashMap(0)
|
every { asColumns() } returns LinkedHashMap.newLinkedHashMap(0)
|
||||||
}
|
}
|
||||||
every { importType } returns Append
|
every { importType } returns Append
|
||||||
|
every { tableSchema } returns
|
||||||
|
mockk(relaxed = true) {
|
||||||
|
every { columnSchema } returns
|
||||||
|
mockk(relaxed = true) {
|
||||||
|
every { inputSchema } returns LinkedHashMap.newLinkedHashMap(0)
|
||||||
|
every { inputToFinalColumnNames } returns emptyMap()
|
||||||
|
}
|
||||||
|
every { getPrimaryKey() } returns emptyList()
|
||||||
|
every { getCursor() } returns emptyList()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
clickhouseAirbyteClient.applyChangeset(
|
clickhouseAirbyteClient.applyChangeset(
|
||||||
stream,
|
stream,
|
||||||
@@ -164,11 +168,20 @@ class ClickhouseAirbyteClientTest {
|
|||||||
|
|
||||||
coEvery { clickhouseAirbyteClient.execute(any()) } returns mockk(relaxed = true)
|
coEvery { clickhouseAirbyteClient.execute(any()) } returns mockk(relaxed = true)
|
||||||
every { tempTableNameGenerator.generate(any()) } returns tempTableName
|
every { tempTableNameGenerator.generate(any()) } returns tempTableName
|
||||||
every { clickhouseFinalTableNameGenerator.getTableName(any()) } returns finalTableName
|
|
||||||
|
|
||||||
mockCHSchemaWithAirbyteColumns()
|
mockCHSchemaWithAirbyteColumns()
|
||||||
|
|
||||||
val columnMapping = ColumnNameMapping(mapOf())
|
val columnMapping = ColumnNameMapping(mapOf())
|
||||||
|
val tableSchema1: StreamTableSchema =
|
||||||
|
mockk(relaxed = true) {
|
||||||
|
every { columnSchema } returns
|
||||||
|
mockk(relaxed = true) {
|
||||||
|
every { inputSchema } returns LinkedHashMap.newLinkedHashMap(0)
|
||||||
|
every { inputToFinalColumnNames } returns emptyMap()
|
||||||
|
}
|
||||||
|
every { getPrimaryKey() } returns emptyList()
|
||||||
|
every { getCursor() } returns emptyList()
|
||||||
|
}
|
||||||
val stream =
|
val stream =
|
||||||
mockk<DestinationStream> {
|
mockk<DestinationStream> {
|
||||||
every { mappedDescriptor } returns
|
every { mappedDescriptor } returns
|
||||||
@@ -182,6 +195,7 @@ class ClickhouseAirbyteClientTest {
|
|||||||
every { asColumns() } returns LinkedHashMap.newLinkedHashMap(0)
|
every { asColumns() } returns LinkedHashMap.newLinkedHashMap(0)
|
||||||
}
|
}
|
||||||
every { importType } returns Append
|
every { importType } returns Append
|
||||||
|
every { tableSchema } returns tableSchema1
|
||||||
}
|
}
|
||||||
clickhouseAirbyteClient.applyChangeset(
|
clickhouseAirbyteClient.applyChangeset(
|
||||||
stream,
|
stream,
|
||||||
@@ -195,8 +209,8 @@ class ClickhouseAirbyteClientTest {
|
|||||||
|
|
||||||
coVerifyOrder {
|
coVerifyOrder {
|
||||||
clickhouseSqlGenerator.createNamespace(tempTableName.namespace)
|
clickhouseSqlGenerator.createNamespace(tempTableName.namespace)
|
||||||
clickhouseSqlGenerator.createTable(stream, tempTableName, columnMapping, true)
|
clickhouseSqlGenerator.createTable(tempTableName, tableSchema1, true)
|
||||||
clickhouseSqlGenerator.copyTable(columnMapping, finalTableName, tempTableName)
|
clickhouseSqlGenerator.copyTable(setOf("something"), finalTableName, tempTableName)
|
||||||
clickhouseSqlGenerator.exchangeTable(tempTableName, finalTableName)
|
clickhouseSqlGenerator.exchangeTable(tempTableName, finalTableName)
|
||||||
clickhouseSqlGenerator.dropTable(tempTableName)
|
clickhouseSqlGenerator.dropTable(tempTableName)
|
||||||
}
|
}
|
||||||
@@ -207,8 +221,6 @@ class ClickhouseAirbyteClientTest {
|
|||||||
fun `test ensure schema matches fails if no airbyte columns`() = runTest {
|
fun `test ensure schema matches fails if no airbyte columns`() = runTest {
|
||||||
val finalTableName = TableName("fin", "al")
|
val finalTableName = TableName("fin", "al")
|
||||||
|
|
||||||
every { clickhouseFinalTableNameGenerator.getTableName(any()) } returns finalTableName
|
|
||||||
|
|
||||||
val columnMapping = ColumnNameMapping(mapOf())
|
val columnMapping = ColumnNameMapping(mapOf())
|
||||||
val stream =
|
val stream =
|
||||||
mockk<DestinationStream> {
|
mockk<DestinationStream> {
|
||||||
@@ -266,6 +278,19 @@ class ClickhouseAirbyteClientTest {
|
|||||||
every { asColumns() } returns columns
|
every { asColumns() } returns columns
|
||||||
}
|
}
|
||||||
every { importType } returns Append
|
every { importType } returns Append
|
||||||
|
every { tableSchema } returns
|
||||||
|
mockk(relaxed = true) {
|
||||||
|
every { columnSchema } returns
|
||||||
|
mockk(relaxed = true) {
|
||||||
|
every { inputSchema } returns columns
|
||||||
|
every { inputToFinalColumnNames } returns
|
||||||
|
mapOf("field 1" to "field_1")
|
||||||
|
every { finalSchema } returns
|
||||||
|
mapOf("field_1" to ColumnType("String", true))
|
||||||
|
}
|
||||||
|
every { getPrimaryKey() } returns emptyList()
|
||||||
|
every { getCursor() } returns emptyList()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
val columnMapping = ColumnNameMapping(mapOf("field 1" to "field_1"))
|
val columnMapping = ColumnNameMapping(mapOf("field 1" to "field_1"))
|
||||||
@@ -280,35 +305,6 @@ class ClickhouseAirbyteClientTest {
|
|||||||
Assertions.assertEquals(expected, actual)
|
Assertions.assertEquals(expected, actual)
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
fun `test copyIntersectionColumn`() = runTest {
|
|
||||||
val columnsToCopy =
|
|
||||||
setOf(
|
|
||||||
"column1",
|
|
||||||
"column2",
|
|
||||||
)
|
|
||||||
val columnNameMapping = ColumnNameMapping(mapOf("2" to "column2", "3" to "column3"))
|
|
||||||
val properTableName = TableName("table", "name")
|
|
||||||
val tempTableName = TableName("table", "tmp")
|
|
||||||
|
|
||||||
coEvery { clickhouseAirbyteClient.execute(any()) } returns mockk()
|
|
||||||
|
|
||||||
clickhouseAirbyteClient.copyIntersectionColumn(
|
|
||||||
columnsToCopy,
|
|
||||||
columnNameMapping,
|
|
||||||
properTableName,
|
|
||||||
tempTableName,
|
|
||||||
)
|
|
||||||
|
|
||||||
verify {
|
|
||||||
clickhouseSqlGenerator.copyTable(
|
|
||||||
ColumnNameMapping(mapOf("2" to "column2")),
|
|
||||||
properTableName,
|
|
||||||
tempTableName,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
companion object {
|
companion object {
|
||||||
// Constants
|
// Constants
|
||||||
private const val DUMMY_SENTENCE = "SELECT 1"
|
private const val DUMMY_SENTENCE = "SELECT 1"
|
||||||
|
|||||||
@@ -10,9 +10,6 @@ import io.airbyte.cdk.load.component.ColumnChangeset
|
|||||||
import io.airbyte.cdk.load.component.ColumnType
|
import io.airbyte.cdk.load.component.ColumnType
|
||||||
import io.airbyte.cdk.load.component.ColumnTypeChange
|
import io.airbyte.cdk.load.component.ColumnTypeChange
|
||||||
import io.airbyte.cdk.load.schema.model.TableName
|
import io.airbyte.cdk.load.schema.model.TableName
|
||||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
|
||||||
import io.airbyte.integrations.destination.clickhouse.spec.ClickhouseConfiguration
|
|
||||||
import io.mockk.mockk
|
|
||||||
import kotlin.test.assertTrue
|
import kotlin.test.assertTrue
|
||||||
import org.junit.jupiter.api.Assertions
|
import org.junit.jupiter.api.Assertions
|
||||||
import org.junit.jupiter.api.Test
|
import org.junit.jupiter.api.Test
|
||||||
@@ -23,9 +20,7 @@ import org.junit.jupiter.params.provider.Arguments
|
|||||||
import org.junit.jupiter.params.provider.MethodSource
|
import org.junit.jupiter.params.provider.MethodSource
|
||||||
|
|
||||||
class ClickhouseSqlGeneratorTest {
|
class ClickhouseSqlGeneratorTest {
|
||||||
private val clickhouseConfiguration: ClickhouseConfiguration = mockk(relaxed = true)
|
private val clickhouseSqlGenerator = ClickhouseSqlGenerator()
|
||||||
|
|
||||||
private val clickhouseSqlGenerator = ClickhouseSqlGenerator(clickhouseConfiguration)
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
fun testCreateNamespace() {
|
fun testCreateNamespace() {
|
||||||
@@ -90,52 +85,35 @@ class ClickhouseSqlGeneratorTest {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
fun `test extractPks with single primary key`() {
|
|
||||||
val primaryKey = listOf(listOf("id"))
|
|
||||||
val columnNameMapping = ColumnNameMapping(mapOf("id" to "id_column"))
|
|
||||||
val expected = listOf("id_column")
|
|
||||||
val actual = clickhouseSqlGenerator.extractPks(primaryKey, columnNameMapping)
|
|
||||||
Assertions.assertEquals(expected, actual)
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
fun `test extractPks with multiple primary keys`() {
|
fun `test extractPks with multiple primary keys`() {
|
||||||
val primaryKey = listOf(listOf("id"), listOf("name"))
|
val primaryKey = listOf(listOf("id"), listOf("name"))
|
||||||
val columnNameMapping =
|
val expected = listOf("id", "name")
|
||||||
ColumnNameMapping(mapOf("id" to "id_column", "name" to "name_column"))
|
val actual = clickhouseSqlGenerator.flattenPks(primaryKey)
|
||||||
val expected = listOf("id_column", "name_column")
|
|
||||||
val actual = clickhouseSqlGenerator.extractPks(primaryKey, columnNameMapping)
|
|
||||||
Assertions.assertEquals(expected, actual)
|
Assertions.assertEquals(expected, actual)
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
fun `test extractPks with empty primary key list`() {
|
fun `test flattenPks with empty primary key list`() {
|
||||||
val primaryKey = emptyList<List<String>>()
|
val primaryKey = emptyList<List<String>>()
|
||||||
val columnNameMapping = ColumnNameMapping(emptyMap<String, String>())
|
|
||||||
val expected = listOf<String>()
|
val expected = listOf<String>()
|
||||||
val actual = clickhouseSqlGenerator.extractPks(primaryKey, columnNameMapping)
|
val actual = clickhouseSqlGenerator.flattenPks(primaryKey)
|
||||||
Assertions.assertEquals(expected, actual)
|
Assertions.assertEquals(expected, actual)
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
fun `test extractPks without column mapping`() {
|
fun `test extractPks with single primary key`() {
|
||||||
val primaryKey = listOf(listOf("id"))
|
val primaryKey = listOf(listOf("id"))
|
||||||
val columnNameMapping = ColumnNameMapping(mapOf())
|
|
||||||
val expected = listOf("id")
|
val expected = listOf("id")
|
||||||
val actual = clickhouseSqlGenerator.extractPks(primaryKey, columnNameMapping)
|
val actual = clickhouseSqlGenerator.flattenPks(primaryKey)
|
||||||
Assertions.assertEquals(expected, actual)
|
Assertions.assertEquals(expected, actual)
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
fun `test extractPks with nested primary key`() {
|
fun `test flattenPks with nested primary key`() {
|
||||||
val primaryKey = listOf(listOf("user", "id"))
|
val primaryKey = listOf(listOf("user", "id"))
|
||||||
val columnNameMapping =
|
|
||||||
ColumnNameMapping(
|
|
||||||
mapOf("user.id" to "user_id_column")
|
|
||||||
) // This mapping is not used but here for completeness.
|
|
||||||
assertThrows<UnsupportedOperationException> {
|
assertThrows<UnsupportedOperationException> {
|
||||||
clickhouseSqlGenerator.extractPks(primaryKey, columnNameMapping)
|
clickhouseSqlGenerator.flattenPks(primaryKey)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -157,8 +135,7 @@ class ClickhouseSqlGeneratorTest {
|
|||||||
fun `test copyTable`() {
|
fun `test copyTable`() {
|
||||||
val sourceTable = TableName("source_namespace", "source_table")
|
val sourceTable = TableName("source_namespace", "source_table")
|
||||||
val targetTable = TableName("target_namespace", "target_table")
|
val targetTable = TableName("target_namespace", "target_table")
|
||||||
val columnNameMapping =
|
val columnNames = setOf("target_col1", "target_col2")
|
||||||
ColumnNameMapping(mapOf("source_col1" to "target_col1", "source_col2" to "target_col2"))
|
|
||||||
|
|
||||||
val expectedSql =
|
val expectedSql =
|
||||||
"""
|
"""
|
||||||
@@ -179,8 +156,7 @@ class ClickhouseSqlGeneratorTest {
|
|||||||
FROM `source_namespace`.`source_table`
|
FROM `source_namespace`.`source_table`
|
||||||
""".trimIndent()
|
""".trimIndent()
|
||||||
|
|
||||||
val actualSql =
|
val actualSql = clickhouseSqlGenerator.copyTable(columnNames, sourceTable, targetTable)
|
||||||
clickhouseSqlGenerator.copyTable(columnNameMapping, sourceTable, targetTable)
|
|
||||||
Assertions.assertEquals(expectedSql, actualSql)
|
Assertions.assertEquals(expectedSql, actualSql)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2,13 +2,13 @@
|
|||||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package io.airbyte.integrations.destination.clickhouse.config
|
package io.airbyte.integrations.destination.clickhouse.schema
|
||||||
|
|
||||||
import java.util.UUID
|
import java.util.UUID
|
||||||
import org.junit.jupiter.api.Assertions
|
import org.junit.jupiter.api.Assertions
|
||||||
import org.junit.jupiter.api.Test
|
import org.junit.jupiter.api.Test
|
||||||
|
|
||||||
class ClickhouseNameGeneratorTest {
|
class ClickhouseNamingUtilsTest {
|
||||||
@Test
|
@Test
|
||||||
fun `toClickHouseCompatibleName replaces special characters with underscores`() {
|
fun `toClickHouseCompatibleName replaces special characters with underscores`() {
|
||||||
Assertions.assertEquals("hello_world", "hello world".toClickHouseCompatibleName())
|
Assertions.assertEquals("hello_world", "hello world".toClickHouseCompatibleName())
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
cdkVersion=0.1.83
|
cdkVersion=0.1.86
|
||||||
# our testcontainer has issues with too much concurrency.
|
# our testcontainer has issues with too much concurrency.
|
||||||
# 4 threads seems to be the sweet spot.
|
# 4 threads seems to be the sweet spot.
|
||||||
testExecutionConcurrency=4
|
testExecutionConcurrency=4
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ data:
|
|||||||
connectorSubtype: database
|
connectorSubtype: database
|
||||||
connectorType: destination
|
connectorType: destination
|
||||||
definitionId: 25c5221d-dce2-4163-ade9-739ef790f503
|
definitionId: 25c5221d-dce2-4163-ade9-739ef790f503
|
||||||
dockerImageTag: 3.0.4
|
dockerImageTag: 3.0.5
|
||||||
dockerRepository: airbyte/destination-postgres
|
dockerRepository: airbyte/destination-postgres
|
||||||
documentationUrl: https://docs.airbyte.com/integrations/destinations/postgres
|
documentationUrl: https://docs.airbyte.com/integrations/destinations/postgres
|
||||||
githubIssueLabel: destination-postgres
|
githubIssueLabel: destination-postgres
|
||||||
|
|||||||
@@ -14,8 +14,11 @@ import io.airbyte.cdk.load.data.FieldType
|
|||||||
import io.airbyte.cdk.load.data.ObjectType
|
import io.airbyte.cdk.load.data.ObjectType
|
||||||
import io.airbyte.cdk.load.data.StringType
|
import io.airbyte.cdk.load.data.StringType
|
||||||
import io.airbyte.cdk.load.message.Meta
|
import io.airbyte.cdk.load.message.Meta
|
||||||
|
import io.airbyte.cdk.load.schema.model.ColumnSchema
|
||||||
|
import io.airbyte.cdk.load.schema.model.StreamTableSchema
|
||||||
|
import io.airbyte.cdk.load.schema.model.TableName
|
||||||
|
import io.airbyte.cdk.load.schema.model.TableNames
|
||||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||||
import io.airbyte.cdk.load.table.TableName
|
|
||||||
import io.airbyte.integrations.destination.postgres.client.PostgresAirbyteClient
|
import io.airbyte.integrations.destination.postgres.client.PostgresAirbyteClient
|
||||||
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
|
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
|
||||||
import io.airbyte.integrations.destination.postgres.write.load.PostgresInsertBuffer
|
import io.airbyte.integrations.destination.postgres.write.load.PostgresInsertBuffer
|
||||||
@@ -51,19 +54,35 @@ class PostgresOssChecker(
|
|||||||
"_airbyte_connection_test_${
|
"_airbyte_connection_test_${
|
||||||
UUID.randomUUID().toString().replace("-".toRegex(), "")}"
|
UUID.randomUUID().toString().replace("-".toRegex(), "")}"
|
||||||
val qualifiedTableName = TableName(namespace = outputSchema, name = tableName)
|
val qualifiedTableName = TableName(namespace = outputSchema, name = tableName)
|
||||||
|
val tempTableName = TableName(namespace = outputSchema, name = "${tableName}_tmp")
|
||||||
|
val checkSchema =
|
||||||
|
ObjectType(linkedMapOf(CHECK_COLUMN_NAME to FieldType(StringType, nullable = false)))
|
||||||
val destinationStream =
|
val destinationStream =
|
||||||
DestinationStream(
|
DestinationStream(
|
||||||
unmappedNamespace = outputSchema,
|
unmappedNamespace = outputSchema,
|
||||||
unmappedName = tableName,
|
unmappedName = tableName,
|
||||||
importType = Append,
|
importType = Append,
|
||||||
schema =
|
schema = checkSchema,
|
||||||
ObjectType(
|
|
||||||
linkedMapOf(CHECK_COLUMN_NAME to FieldType(StringType, nullable = false))
|
|
||||||
),
|
|
||||||
generationId = 0L,
|
generationId = 0L,
|
||||||
minimumGenerationId = 0L,
|
minimumGenerationId = 0L,
|
||||||
syncId = 0L,
|
syncId = 0L,
|
||||||
namespaceMapper = NamespaceMapper()
|
namespaceMapper = NamespaceMapper(),
|
||||||
|
tableSchema =
|
||||||
|
StreamTableSchema(
|
||||||
|
tableNames =
|
||||||
|
TableNames(
|
||||||
|
finalTableName = qualifiedTableName,
|
||||||
|
tempTableName = tempTableName,
|
||||||
|
),
|
||||||
|
columnSchema =
|
||||||
|
ColumnSchema(
|
||||||
|
inputSchema = checkSchema.properties,
|
||||||
|
inputToFinalColumnNames =
|
||||||
|
mapOf(CHECK_COLUMN_NAME to CHECK_COLUMN_NAME),
|
||||||
|
finalSchema = emptyMap(),
|
||||||
|
),
|
||||||
|
importType = Append,
|
||||||
|
),
|
||||||
)
|
)
|
||||||
runBlocking {
|
runBlocking {
|
||||||
try {
|
try {
|
||||||
|
|||||||
@@ -4,15 +4,19 @@
|
|||||||
|
|
||||||
package io.airbyte.integrations.destination.postgres.client
|
package io.airbyte.integrations.destination.postgres.client
|
||||||
|
|
||||||
|
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings
|
||||||
|
import io.airbyte.cdk.ConfigErrorException
|
||||||
import io.airbyte.cdk.load.command.DestinationStream
|
import io.airbyte.cdk.load.command.DestinationStream
|
||||||
import io.airbyte.cdk.load.component.ColumnChangeset
|
import io.airbyte.cdk.load.component.ColumnChangeset
|
||||||
|
import io.airbyte.cdk.load.component.ColumnType
|
||||||
import io.airbyte.cdk.load.component.TableColumns
|
import io.airbyte.cdk.load.component.TableColumns
|
||||||
import io.airbyte.cdk.load.component.TableOperationsClient
|
import io.airbyte.cdk.load.component.TableOperationsClient
|
||||||
import io.airbyte.cdk.load.component.TableSchema
|
import io.airbyte.cdk.load.component.TableSchema
|
||||||
import io.airbyte.cdk.load.component.TableSchemaEvolutionClient
|
import io.airbyte.cdk.load.component.TableSchemaEvolutionClient
|
||||||
|
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAMES
|
||||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_GENERATION_ID
|
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_GENERATION_ID
|
||||||
|
import io.airbyte.cdk.load.schema.model.TableName
|
||||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||||
import io.airbyte.cdk.load.table.TableName
|
|
||||||
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
|
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
|
||||||
import io.airbyte.integrations.destination.postgres.sql.COUNT_TOTAL_ALIAS
|
import io.airbyte.integrations.destination.postgres.sql.COUNT_TOTAL_ALIAS
|
||||||
import io.airbyte.integrations.destination.postgres.sql.Column
|
import io.airbyte.integrations.destination.postgres.sql.Column
|
||||||
@@ -26,6 +30,11 @@ import javax.sql.DataSource
|
|||||||
private val log = KotlinLogging.logger {}
|
private val log = KotlinLogging.logger {}
|
||||||
|
|
||||||
@Singleton
|
@Singleton
|
||||||
|
@SuppressFBWarnings(
|
||||||
|
value = ["SQL_NONCONSTANT_STRING_PASSED_TO_EXECUTE"],
|
||||||
|
justification =
|
||||||
|
"There is little chance of SQL injection. There is also little need for statement reuse. The basic statement is more readable than the prepared statement."
|
||||||
|
)
|
||||||
class PostgresAirbyteClient(
|
class PostgresAirbyteClient(
|
||||||
private val dataSource: DataSource,
|
private val dataSource: DataSource,
|
||||||
private val sqlGenerator: PostgresDirectLoadSqlGenerator,
|
private val sqlGenerator: PostgresDirectLoadSqlGenerator,
|
||||||
@@ -53,6 +62,29 @@ class PostgresAirbyteClient(
|
|||||||
null
|
null
|
||||||
}
|
}
|
||||||
|
|
||||||
|
override suspend fun namespaceExists(namespace: String): Boolean {
|
||||||
|
return executeQuery(
|
||||||
|
"""
|
||||||
|
SELECT EXISTS(
|
||||||
|
SELECT 1 FROM information_schema.schemata
|
||||||
|
WHERE schema_name = '$namespace'
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
) { rs -> rs.next() && rs.getBoolean(1) }
|
||||||
|
}
|
||||||
|
|
||||||
|
override suspend fun tableExists(table: TableName): Boolean {
|
||||||
|
return executeQuery(
|
||||||
|
"""
|
||||||
|
SELECT EXISTS(
|
||||||
|
SELECT 1 FROM information_schema.tables
|
||||||
|
WHERE table_schema = '${table.namespace}'
|
||||||
|
AND table_name = '${table.name}'
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
) { rs -> rs.next() && rs.getBoolean(1) }
|
||||||
|
}
|
||||||
|
|
||||||
override suspend fun createNamespace(namespace: String) {
|
override suspend fun createNamespace(namespace: String) {
|
||||||
try {
|
try {
|
||||||
execute(sqlGenerator.createNamespace(namespace))
|
execute(sqlGenerator.createNamespace(namespace))
|
||||||
@@ -171,14 +203,26 @@ class PostgresAirbyteClient(
|
|||||||
}
|
}
|
||||||
|
|
||||||
override suspend fun discoverSchema(tableName: TableName): TableSchema {
|
override suspend fun discoverSchema(tableName: TableName): TableSchema {
|
||||||
TODO("Not yet implemented")
|
val columnsInDb = getColumnsFromDbForDiscovery(tableName)
|
||||||
|
val hasAllAirbyteColumns = columnsInDb.keys.containsAll(COLUMN_NAMES)
|
||||||
|
|
||||||
|
if (!hasAllAirbyteColumns) {
|
||||||
|
val message =
|
||||||
|
"The target table ($tableName) already exists in the destination, but does not contain Airbyte's internal columns. Airbyte can only sync to Airbyte-controlled tables. To fix this error, you must either delete the target table or add a prefix in the connection configuration in order to sync to a separate table in the destination."
|
||||||
|
log.error { message }
|
||||||
|
throw ConfigErrorException(message)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter out Airbyte columns
|
||||||
|
val userColumns = columnsInDb.filterKeys { it !in COLUMN_NAMES }
|
||||||
|
return TableSchema(userColumns)
|
||||||
}
|
}
|
||||||
|
|
||||||
override fun computeSchema(
|
override fun computeSchema(
|
||||||
stream: DestinationStream,
|
stream: DestinationStream,
|
||||||
columnNameMapping: ColumnNameMapping
|
columnNameMapping: ColumnNameMapping
|
||||||
): TableSchema {
|
): TableSchema {
|
||||||
TODO("Not yet implemented")
|
return TableSchema(stream.tableSchema.columnSchema.finalSchema)
|
||||||
}
|
}
|
||||||
|
|
||||||
override suspend fun applyChangeset(
|
override suspend fun applyChangeset(
|
||||||
@@ -188,9 +232,73 @@ class PostgresAirbyteClient(
|
|||||||
expectedColumns: TableColumns,
|
expectedColumns: TableColumns,
|
||||||
columnChangeset: ColumnChangeset
|
columnChangeset: ColumnChangeset
|
||||||
) {
|
) {
|
||||||
TODO("Not yet implemented")
|
if (
|
||||||
|
columnChangeset.columnsToAdd.isNotEmpty() ||
|
||||||
|
columnChangeset.columnsToDrop.isNotEmpty() ||
|
||||||
|
columnChangeset.columnsToChange.isNotEmpty()
|
||||||
|
) {
|
||||||
|
log.info { "Summary of the table alterations:" }
|
||||||
|
log.info { "Added columns: ${columnChangeset.columnsToAdd}" }
|
||||||
|
log.info { "Deleted columns: ${columnChangeset.columnsToDrop}" }
|
||||||
|
log.info { "Modified columns: ${columnChangeset.columnsToChange}" }
|
||||||
|
|
||||||
|
// Convert from TableColumns format to Column format
|
||||||
|
val columnsToAdd =
|
||||||
|
columnChangeset.columnsToAdd
|
||||||
|
.map { (name, type) -> Column(name, type.type, type.nullable) }
|
||||||
|
.toSet()
|
||||||
|
val columnsToRemove =
|
||||||
|
columnChangeset.columnsToDrop
|
||||||
|
.map { (name, type) -> Column(name, type.type, type.nullable) }
|
||||||
|
.toSet()
|
||||||
|
val columnsToModify =
|
||||||
|
columnChangeset.columnsToChange
|
||||||
|
.map { (name, change) ->
|
||||||
|
Column(name, change.newType.type, change.newType.nullable)
|
||||||
|
}
|
||||||
|
.toSet()
|
||||||
|
val columnsInDb =
|
||||||
|
(columnChangeset.columnsToRetain +
|
||||||
|
columnChangeset.columnsToDrop +
|
||||||
|
columnChangeset.columnsToChange.mapValues { it.value.originalType })
|
||||||
|
.map { (name, type) -> Column(name, type.type, type.nullable) }
|
||||||
|
.toSet()
|
||||||
|
|
||||||
|
execute(
|
||||||
|
sqlGenerator.matchSchemas(
|
||||||
|
tableName = tableName,
|
||||||
|
columnsToAdd = columnsToAdd,
|
||||||
|
columnsToRemove = columnsToRemove,
|
||||||
|
columnsToModify = columnsToModify,
|
||||||
|
columnsInDb = columnsInDb,
|
||||||
|
recreatePrimaryKeyIndex = false,
|
||||||
|
primaryKeyColumnNames = emptyList(),
|
||||||
|
recreateCursorIndex = false,
|
||||||
|
cursorColumnName = null,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets columns from the database including their types for schema discovery. Unlike
|
||||||
|
* [getColumnsFromDb], this returns all columns including Airbyte metadata columns.
|
||||||
|
*/
|
||||||
|
private fun getColumnsFromDbForDiscovery(tableName: TableName): Map<String, ColumnType> =
|
||||||
|
executeQuery(sqlGenerator.getTableSchema(tableName)) { rs ->
|
||||||
|
val columnsInDb: MutableMap<String, ColumnType> = mutableMapOf()
|
||||||
|
while (rs.next()) {
|
||||||
|
val columnName = rs.getString(COLUMN_NAME_COLUMN)
|
||||||
|
val dataType = rs.getString("data_type")
|
||||||
|
// PostgreSQL's information_schema always returns 'YES' or 'NO' for is_nullable
|
||||||
|
val isNullable = rs.getString("is_nullable") == "YES"
|
||||||
|
|
||||||
|
columnsInDb[columnName] = ColumnType(normalizePostgresType(dataType), isNullable)
|
||||||
|
}
|
||||||
|
|
||||||
|
columnsInDb
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks if the primary key index matches the current stream configuration. If the primary keys
|
* Checks if the primary key index matches the current stream configuration. If the primary keys
|
||||||
* have changed (detected by comparing columns in the index), then this will return true,
|
* have changed (detected by comparing columns in the index), then this will return true,
|
||||||
|
|||||||
@@ -13,8 +13,8 @@ import io.airbyte.cdk.integrations.util.PostgresSslConnectionUtils
|
|||||||
import io.airbyte.cdk.load.check.CheckOperationV2
|
import io.airbyte.cdk.load.check.CheckOperationV2
|
||||||
import io.airbyte.cdk.load.check.DestinationCheckerV2
|
import io.airbyte.cdk.load.check.DestinationCheckerV2
|
||||||
import io.airbyte.cdk.load.dataflow.config.AggregatePublishingConfig
|
import io.airbyte.cdk.load.dataflow.config.AggregatePublishingConfig
|
||||||
import io.airbyte.cdk.load.orchestration.db.DefaultTempTableNameGenerator
|
import io.airbyte.cdk.load.table.DefaultTempTableNameGenerator
|
||||||
import io.airbyte.cdk.load.orchestration.db.TempTableNameGenerator
|
import io.airbyte.cdk.load.table.TempTableNameGenerator
|
||||||
import io.airbyte.cdk.output.OutputConsumer
|
import io.airbyte.cdk.output.OutputConsumer
|
||||||
import io.airbyte.cdk.ssh.SshConnectionOptions
|
import io.airbyte.cdk.ssh.SshConnectionOptions
|
||||||
import io.airbyte.cdk.ssh.SshKeyAuthTunnelMethod
|
import io.airbyte.cdk.ssh.SshKeyAuthTunnelMethod
|
||||||
|
|||||||
@@ -4,17 +4,17 @@
|
|||||||
|
|
||||||
package io.airbyte.integrations.destination.postgres.config
|
package io.airbyte.integrations.destination.postgres.config
|
||||||
|
|
||||||
|
import io.airbyte.cdk.load.command.DestinationCatalog
|
||||||
import io.airbyte.cdk.load.component.TableOperationsClient
|
import io.airbyte.cdk.load.component.TableOperationsClient
|
||||||
import io.airbyte.cdk.load.orchestration.db.BaseDirectLoadInitialStatusGatherer
|
import io.airbyte.cdk.load.table.BaseDirectLoadInitialStatusGatherer
|
||||||
import io.airbyte.cdk.load.orchestration.db.TempTableNameGenerator
|
|
||||||
import jakarta.inject.Singleton
|
import jakarta.inject.Singleton
|
||||||
|
|
||||||
@Singleton
|
@Singleton
|
||||||
class PostgresDirectLoadDatabaseInitialStatusGatherer(
|
class PostgresDirectLoadDatabaseInitialStatusGatherer(
|
||||||
airbyteClient: TableOperationsClient,
|
airbyteClient: TableOperationsClient,
|
||||||
tempTableNameGenerator: TempTableNameGenerator,
|
catalog: DestinationCatalog,
|
||||||
) :
|
) :
|
||||||
BaseDirectLoadInitialStatusGatherer(
|
BaseDirectLoadInitialStatusGatherer(
|
||||||
airbyteClient,
|
airbyteClient,
|
||||||
tempTableNameGenerator,
|
catalog,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ package io.airbyte.integrations.destination.postgres.dataflow
|
|||||||
import io.airbyte.cdk.load.dataflow.aggregate.Aggregate
|
import io.airbyte.cdk.load.dataflow.aggregate.Aggregate
|
||||||
import io.airbyte.cdk.load.dataflow.aggregate.AggregateFactory
|
import io.airbyte.cdk.load.dataflow.aggregate.AggregateFactory
|
||||||
import io.airbyte.cdk.load.dataflow.aggregate.StoreKey
|
import io.airbyte.cdk.load.dataflow.aggregate.StoreKey
|
||||||
import io.airbyte.cdk.load.orchestration.db.direct_load_table.DirectLoadTableExecutionConfig
|
import io.airbyte.cdk.load.table.directload.DirectLoadTableExecutionConfig
|
||||||
import io.airbyte.cdk.load.write.StreamStateStore
|
import io.airbyte.cdk.load.write.StreamStateStore
|
||||||
import io.airbyte.integrations.destination.postgres.client.PostgresAirbyteClient
|
import io.airbyte.integrations.destination.postgres.client.PostgresAirbyteClient
|
||||||
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
|
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
|
||||||
|
|||||||
@@ -6,10 +6,10 @@ package io.airbyte.integrations.destination.postgres.db
|
|||||||
|
|
||||||
import io.airbyte.cdk.load.command.DestinationStream
|
import io.airbyte.cdk.load.command.DestinationStream
|
||||||
import io.airbyte.cdk.load.data.Transformations.Companion.toAlphanumericAndUnderscore
|
import io.airbyte.cdk.load.data.Transformations.Companion.toAlphanumericAndUnderscore
|
||||||
import io.airbyte.cdk.load.orchestration.db.ColumnNameGenerator
|
import io.airbyte.cdk.load.schema.model.TableName
|
||||||
import io.airbyte.cdk.load.orchestration.db.FinalTableNameGenerator
|
import io.airbyte.cdk.load.table.ColumnNameGenerator
|
||||||
import io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping.TypingDedupingUtil
|
import io.airbyte.cdk.load.table.FinalTableNameGenerator
|
||||||
import io.airbyte.cdk.load.table.TableName
|
import io.airbyte.cdk.load.table.TypingDedupingUtil
|
||||||
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
|
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
|
||||||
import jakarta.inject.Singleton
|
import jakarta.inject.Singleton
|
||||||
import java.util.Locale
|
import java.util.Locale
|
||||||
|
|||||||
@@ -0,0 +1,96 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package io.airbyte.integrations.destination.postgres.schema
|
||||||
|
|
||||||
|
import io.airbyte.cdk.load.command.DestinationStream
|
||||||
|
import io.airbyte.cdk.load.component.ColumnType
|
||||||
|
import io.airbyte.cdk.load.data.ArrayType
|
||||||
|
import io.airbyte.cdk.load.data.ArrayTypeWithoutSchema
|
||||||
|
import io.airbyte.cdk.load.data.BooleanType
|
||||||
|
import io.airbyte.cdk.load.data.DateType
|
||||||
|
import io.airbyte.cdk.load.data.FieldType
|
||||||
|
import io.airbyte.cdk.load.data.IntegerType
|
||||||
|
import io.airbyte.cdk.load.data.NumberType
|
||||||
|
import io.airbyte.cdk.load.data.ObjectType
|
||||||
|
import io.airbyte.cdk.load.data.ObjectTypeWithEmptySchema
|
||||||
|
import io.airbyte.cdk.load.data.ObjectTypeWithoutSchema
|
||||||
|
import io.airbyte.cdk.load.data.StringType
|
||||||
|
import io.airbyte.cdk.load.data.TimeTypeWithTimezone
|
||||||
|
import io.airbyte.cdk.load.data.TimeTypeWithoutTimezone
|
||||||
|
import io.airbyte.cdk.load.data.TimestampTypeWithTimezone
|
||||||
|
import io.airbyte.cdk.load.data.TimestampTypeWithoutTimezone
|
||||||
|
import io.airbyte.cdk.load.data.UnionType
|
||||||
|
import io.airbyte.cdk.load.data.UnknownType
|
||||||
|
import io.airbyte.cdk.load.schema.TableSchemaMapper
|
||||||
|
import io.airbyte.cdk.load.schema.model.TableName
|
||||||
|
import io.airbyte.cdk.load.table.TempTableNameGenerator
|
||||||
|
import io.airbyte.cdk.load.table.TypingDedupingUtil
|
||||||
|
import io.airbyte.integrations.destination.postgres.db.toPostgresCompatibleName
|
||||||
|
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
|
||||||
|
import io.airbyte.integrations.destination.postgres.sql.PostgresDataType
|
||||||
|
import jakarta.inject.Singleton
|
||||||
|
|
||||||
|
@Singleton
|
||||||
|
class PostgresTableSchemaMapper(
|
||||||
|
private val config: PostgresConfiguration,
|
||||||
|
private val tempTableNameGenerator: TempTableNameGenerator,
|
||||||
|
) : TableSchemaMapper {
|
||||||
|
override fun toFinalTableName(desc: DestinationStream.Descriptor): TableName {
|
||||||
|
val namespace = desc.namespace ?: config.schema
|
||||||
|
return if (!config.legacyRawTablesOnly) {
|
||||||
|
TableName(
|
||||||
|
namespace = namespace.toPostgresCompatibleName(),
|
||||||
|
name = desc.name.toPostgresCompatibleName(),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
TableName(
|
||||||
|
namespace = config.internalTableSchema!!.lowercase().toPostgresCompatibleName(),
|
||||||
|
name =
|
||||||
|
TypingDedupingUtil.concatenateRawTableName(
|
||||||
|
namespace = namespace,
|
||||||
|
name = desc.name,
|
||||||
|
)
|
||||||
|
.lowercase()
|
||||||
|
.toPostgresCompatibleName(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun toTempTableName(tableName: TableName): TableName {
|
||||||
|
return tempTableNameGenerator.generate(tableName)
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun toColumnName(name: String): String {
|
||||||
|
return if (config.legacyRawTablesOnly) {
|
||||||
|
name
|
||||||
|
} else {
|
||||||
|
name.toPostgresCompatibleName()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun toColumnType(fieldType: FieldType): ColumnType {
|
||||||
|
val postgresType =
|
||||||
|
when (fieldType.type) {
|
||||||
|
BooleanType -> PostgresDataType.BOOLEAN.typeName
|
||||||
|
DateType -> PostgresDataType.DATE.typeName
|
||||||
|
IntegerType -> PostgresDataType.BIGINT.typeName
|
||||||
|
NumberType -> PostgresDataType.DECIMAL.typeName
|
||||||
|
StringType -> PostgresDataType.VARCHAR.typeName
|
||||||
|
TimeTypeWithTimezone -> PostgresDataType.TIME_WITH_TIMEZONE.typeName
|
||||||
|
TimeTypeWithoutTimezone -> PostgresDataType.TIME.typeName
|
||||||
|
TimestampTypeWithTimezone -> PostgresDataType.TIMESTAMP_WITH_TIMEZONE.typeName
|
||||||
|
TimestampTypeWithoutTimezone -> PostgresDataType.TIMESTAMP.typeName
|
||||||
|
is ArrayType,
|
||||||
|
ArrayTypeWithoutSchema,
|
||||||
|
is ObjectType,
|
||||||
|
ObjectTypeWithEmptySchema,
|
||||||
|
ObjectTypeWithoutSchema,
|
||||||
|
is UnknownType,
|
||||||
|
is UnionType -> PostgresDataType.JSONB.typeName
|
||||||
|
}
|
||||||
|
|
||||||
|
return ColumnType(postgresType, fieldType.nullable)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -31,8 +31,8 @@ import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_LOADED_AT
|
|||||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_META
|
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_META
|
||||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_RAW_ID
|
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_RAW_ID
|
||||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_DATA
|
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_DATA
|
||||||
|
import io.airbyte.cdk.load.schema.model.TableName
|
||||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||||
import io.airbyte.cdk.load.table.TableName
|
|
||||||
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
|
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
|
||||||
import jakarta.inject.Singleton
|
import jakarta.inject.Singleton
|
||||||
import kotlin.collections.plus
|
import kotlin.collections.plus
|
||||||
|
|||||||
@@ -9,9 +9,9 @@ import io.airbyte.cdk.load.command.Dedupe
|
|||||||
import io.airbyte.cdk.load.command.DestinationStream
|
import io.airbyte.cdk.load.command.DestinationStream
|
||||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_EXTRACTED_AT
|
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_EXTRACTED_AT
|
||||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_GENERATION_ID
|
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_GENERATION_ID
|
||||||
|
import io.airbyte.cdk.load.schema.model.TableName
|
||||||
import io.airbyte.cdk.load.table.CDC_DELETED_AT_COLUMN
|
import io.airbyte.cdk.load.table.CDC_DELETED_AT_COLUMN
|
||||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||||
import io.airbyte.cdk.load.table.TableName
|
|
||||||
import io.airbyte.integrations.destination.postgres.spec.CdcDeletionMode
|
import io.airbyte.integrations.destination.postgres.spec.CdcDeletionMode
|
||||||
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
|
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
|
||||||
import jakarta.inject.Singleton
|
import jakarta.inject.Singleton
|
||||||
@@ -531,7 +531,7 @@ class PostgresDirectLoadSqlGenerator(
|
|||||||
|
|
||||||
fun getTableSchema(tableName: TableName): String =
|
fun getTableSchema(tableName: TableName): String =
|
||||||
"""
|
"""
|
||||||
SELECT column_name, data_type
|
SELECT column_name, data_type, is_nullable
|
||||||
FROM information_schema.columns
|
FROM information_schema.columns
|
||||||
WHERE table_schema = '${tableName.namespace}'
|
WHERE table_schema = '${tableName.namespace}'
|
||||||
AND table_name = '${tableName.name}';
|
AND table_name = '${tableName.name}';
|
||||||
|
|||||||
@@ -6,16 +6,17 @@ package io.airbyte.integrations.destination.postgres.write
|
|||||||
|
|
||||||
import io.airbyte.cdk.SystemErrorException
|
import io.airbyte.cdk.SystemErrorException
|
||||||
import io.airbyte.cdk.load.command.Dedupe
|
import io.airbyte.cdk.load.command.Dedupe
|
||||||
|
import io.airbyte.cdk.load.command.DestinationCatalog
|
||||||
import io.airbyte.cdk.load.command.DestinationStream
|
import io.airbyte.cdk.load.command.DestinationStream
|
||||||
import io.airbyte.cdk.load.orchestration.db.DatabaseInitialStatusGatherer
|
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||||
import io.airbyte.cdk.load.orchestration.db.TempTableNameGenerator
|
import io.airbyte.cdk.load.table.DatabaseInitialStatusGatherer
|
||||||
import io.airbyte.cdk.load.orchestration.db.direct_load_table.DirectLoadInitialStatus
|
import io.airbyte.cdk.load.table.TempTableNameGenerator
|
||||||
import io.airbyte.cdk.load.orchestration.db.direct_load_table.DirectLoadTableAppendStreamLoader
|
import io.airbyte.cdk.load.table.directload.DirectLoadInitialStatus
|
||||||
import io.airbyte.cdk.load.orchestration.db.direct_load_table.DirectLoadTableAppendTruncateStreamLoader
|
import io.airbyte.cdk.load.table.directload.DirectLoadTableAppendStreamLoader
|
||||||
import io.airbyte.cdk.load.orchestration.db.direct_load_table.DirectLoadTableDedupStreamLoader
|
import io.airbyte.cdk.load.table.directload.DirectLoadTableAppendTruncateStreamLoader
|
||||||
import io.airbyte.cdk.load.orchestration.db.direct_load_table.DirectLoadTableDedupTruncateStreamLoader
|
import io.airbyte.cdk.load.table.directload.DirectLoadTableDedupStreamLoader
|
||||||
import io.airbyte.cdk.load.orchestration.db.direct_load_table.DirectLoadTableExecutionConfig
|
import io.airbyte.cdk.load.table.directload.DirectLoadTableDedupTruncateStreamLoader
|
||||||
import io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping.TableCatalog
|
import io.airbyte.cdk.load.table.directload.DirectLoadTableExecutionConfig
|
||||||
import io.airbyte.cdk.load.write.DestinationWriter
|
import io.airbyte.cdk.load.write.DestinationWriter
|
||||||
import io.airbyte.cdk.load.write.StreamLoader
|
import io.airbyte.cdk.load.write.StreamLoader
|
||||||
import io.airbyte.cdk.load.write.StreamStateStore
|
import io.airbyte.cdk.load.write.StreamStateStore
|
||||||
@@ -28,7 +29,7 @@ private val log = KotlinLogging.logger {}
|
|||||||
|
|
||||||
@Singleton
|
@Singleton
|
||||||
class PostgresWriter(
|
class PostgresWriter(
|
||||||
private val names: TableCatalog,
|
private val catalog: DestinationCatalog,
|
||||||
private val stateGatherer: DatabaseInitialStatusGatherer<DirectLoadInitialStatus>,
|
private val stateGatherer: DatabaseInitialStatusGatherer<DirectLoadInitialStatus>,
|
||||||
private val streamStateStore: StreamStateStore<DirectLoadTableExecutionConfig>,
|
private val streamStateStore: StreamStateStore<DirectLoadTableExecutionConfig>,
|
||||||
private val postgresClient: PostgresAirbyteClient,
|
private val postgresClient: PostgresAirbyteClient,
|
||||||
@@ -38,19 +39,20 @@ class PostgresWriter(
|
|||||||
private lateinit var initialStatuses: Map<DestinationStream, DirectLoadInitialStatus>
|
private lateinit var initialStatuses: Map<DestinationStream, DirectLoadInitialStatus>
|
||||||
|
|
||||||
override suspend fun setup() {
|
override suspend fun setup() {
|
||||||
names.values
|
catalog.streams
|
||||||
.map { (tableNames, _) -> tableNames.finalTableName!!.namespace }
|
.map { it.tableSchema.tableNames.finalTableName!!.namespace }
|
||||||
.forEach { postgresClient.createNamespace(it) }
|
.forEach { postgresClient.createNamespace(it) }
|
||||||
|
|
||||||
initialStatuses = stateGatherer.gatherInitialStatus(names)
|
initialStatuses = stateGatherer.gatherInitialStatus()
|
||||||
}
|
}
|
||||||
|
|
||||||
override fun createStreamLoader(stream: DestinationStream): StreamLoader {
|
override fun createStreamLoader(stream: DestinationStream): StreamLoader {
|
||||||
val initialStatus = initialStatuses[stream]!!
|
val initialStatus = initialStatuses[stream]!!
|
||||||
val tableNameInfo = names[stream]!!
|
val realTableName = stream.tableSchema.tableNames.finalTableName!!
|
||||||
val realTableName = tableNameInfo.tableNames.finalTableName!!
|
|
||||||
val tempTableName = tempTableNameGenerator.generate(realTableName)
|
val tempTableName = tempTableNameGenerator.generate(realTableName)
|
||||||
val columnNameMapping = tableNameInfo.columnNameMapping
|
val columnNameMapping =
|
||||||
|
ColumnNameMapping(stream.tableSchema.columnSchema.inputToFinalColumnNames)
|
||||||
|
|
||||||
val isRawTablesMode = postgresConfiguration.legacyRawTablesOnly == true
|
val isRawTablesMode = postgresConfiguration.legacyRawTablesOnly == true
|
||||||
if (isRawTablesMode && stream.importType is Dedupe) {
|
if (isRawTablesMode && stream.importType is Dedupe) {
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ package io.airbyte.integrations.destination.postgres.write.load
|
|||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting
|
import com.google.common.annotations.VisibleForTesting
|
||||||
import io.airbyte.cdk.load.data.AirbyteValue
|
import io.airbyte.cdk.load.data.AirbyteValue
|
||||||
import io.airbyte.cdk.load.table.TableName
|
import io.airbyte.cdk.load.schema.model.TableName
|
||||||
import io.airbyte.integrations.destination.postgres.client.PostgresAirbyteClient
|
import io.airbyte.integrations.destination.postgres.client.PostgresAirbyteClient
|
||||||
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
|
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
|
||||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||||
|
|||||||
@@ -1,25 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package io.airbyte.integrations.destination.postgres.write.transform
|
|
||||||
|
|
||||||
import io.airbyte.cdk.load.command.DestinationStream
|
|
||||||
import io.airbyte.cdk.load.dataflow.transform.ColumnNameMapper
|
|
||||||
import io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping.TableCatalog
|
|
||||||
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
|
|
||||||
import jakarta.inject.Singleton
|
|
||||||
|
|
||||||
@Singleton
|
|
||||||
class PostgresColumnNameMapper(
|
|
||||||
private val catalogInfo: TableCatalog,
|
|
||||||
private val postgresConfiguration: PostgresConfiguration,
|
|
||||||
) : ColumnNameMapper {
|
|
||||||
override fun getMappedColumnName(stream: DestinationStream, columnName: String): String {
|
|
||||||
if (postgresConfiguration.legacyRawTablesOnly == true) {
|
|
||||||
return columnName
|
|
||||||
} else {
|
|
||||||
return catalogInfo.getMappedColumnName(stream, columnName)!!
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -0,0 +1,45 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package io.airbyte.integrations.destination.postgres.component
|
||||||
|
|
||||||
|
import io.airbyte.cdk.load.util.Jsons
|
||||||
|
import io.airbyte.integrations.destination.postgres.PostgresConfigUpdater
|
||||||
|
import io.airbyte.integrations.destination.postgres.PostgresContainerHelper
|
||||||
|
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
|
||||||
|
import io.airbyte.integrations.destination.postgres.spec.PostgresConfigurationFactory
|
||||||
|
import io.airbyte.integrations.destination.postgres.spec.PostgresSpecificationOss
|
||||||
|
import io.micronaut.context.annotation.Factory
|
||||||
|
import io.micronaut.context.annotation.Primary
|
||||||
|
import io.micronaut.context.annotation.Requires
|
||||||
|
import jakarta.inject.Singleton
|
||||||
|
|
||||||
|
@Requires(env = ["component"])
|
||||||
|
@Factory
|
||||||
|
class PostgresComponentTestConfigFactory {
|
||||||
|
@Singleton
|
||||||
|
@Primary
|
||||||
|
fun config(): PostgresConfiguration {
|
||||||
|
// Start the postgres container
|
||||||
|
PostgresContainerHelper.start()
|
||||||
|
|
||||||
|
// Create a minimal config JSON and update it with container details
|
||||||
|
val configJson =
|
||||||
|
"""
|
||||||
|
{
|
||||||
|
"host": "replace_me_host",
|
||||||
|
"port": "replace_me_port",
|
||||||
|
"database": "replace_me_database",
|
||||||
|
"schema": "public",
|
||||||
|
"username": "replace_me_username",
|
||||||
|
"password": "replace_me_password",
|
||||||
|
"ssl": false
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
val updatedConfig = PostgresConfigUpdater().update(configJson)
|
||||||
|
val spec = Jsons.readValue(updatedConfig, PostgresSpecificationOss::class.java)
|
||||||
|
return PostgresConfigurationFactory().makeWithoutExceptionHandling(spec)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,36 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package io.airbyte.integrations.destination.postgres.component
|
||||||
|
|
||||||
|
import io.airbyte.cdk.load.component.ColumnType
|
||||||
|
import io.airbyte.cdk.load.component.TableOperationsFixtures
|
||||||
|
import io.airbyte.cdk.load.component.TableSchema
|
||||||
|
|
||||||
|
object PostgresComponentTestFixtures {
|
||||||
|
// PostgreSQL uses lowercase column names by default (no transformation needed)
|
||||||
|
val testMapping = TableOperationsFixtures.TEST_MAPPING
|
||||||
|
val idAndTestMapping = TableOperationsFixtures.ID_AND_TEST_MAPPING
|
||||||
|
val idTestWithCdcMapping = TableOperationsFixtures.ID_TEST_WITH_CDC_MAPPING
|
||||||
|
|
||||||
|
val allTypesTableSchema =
|
||||||
|
TableSchema(
|
||||||
|
mapOf(
|
||||||
|
"string" to ColumnType("varchar", true),
|
||||||
|
"boolean" to ColumnType("boolean", true),
|
||||||
|
"integer" to ColumnType("bigint", true),
|
||||||
|
"number" to ColumnType("decimal", true),
|
||||||
|
"date" to ColumnType("date", true),
|
||||||
|
"timestamp_tz" to ColumnType("timestamp with time zone", true),
|
||||||
|
"timestamp_ntz" to ColumnType("timestamp", true),
|
||||||
|
"time_tz" to ColumnType("time with time zone", true),
|
||||||
|
"time_ntz" to ColumnType("time", true),
|
||||||
|
"array" to ColumnType("jsonb", true),
|
||||||
|
"object" to ColumnType("jsonb", true),
|
||||||
|
"unknown" to ColumnType("jsonb", true),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
val allTypesColumnNameMapping = TableOperationsFixtures.ALL_TYPES_MAPPING
|
||||||
|
}
|
||||||
@@ -0,0 +1,92 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package io.airbyte.integrations.destination.postgres.component
|
||||||
|
|
||||||
|
import io.airbyte.cdk.load.component.TableOperationsFixtures
|
||||||
|
import io.airbyte.cdk.load.component.TableOperationsSuite
|
||||||
|
import io.airbyte.cdk.load.schema.TableSchemaFactory
|
||||||
|
import io.airbyte.integrations.destination.postgres.client.PostgresAirbyteClient
|
||||||
|
import io.airbyte.integrations.destination.postgres.component.PostgresComponentTestFixtures.idTestWithCdcMapping
|
||||||
|
import io.airbyte.integrations.destination.postgres.component.PostgresComponentTestFixtures.testMapping
|
||||||
|
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
|
||||||
|
import jakarta.inject.Inject
|
||||||
|
import org.junit.jupiter.api.Disabled
|
||||||
|
import org.junit.jupiter.api.Test
|
||||||
|
|
||||||
|
@MicronautTest(environments = ["component"])
|
||||||
|
class PostgresTableOperationsTest(
|
||||||
|
override val client: PostgresAirbyteClient,
|
||||||
|
override val testClient: PostgresTestTableOperationsClient,
|
||||||
|
) : TableOperationsSuite {
|
||||||
|
|
||||||
|
@Inject override lateinit var schemaFactory: TableSchemaFactory
|
||||||
|
|
||||||
|
@Test
|
||||||
|
override fun `connect to database`() {
|
||||||
|
super.`connect to database`()
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
override fun `create and drop namespaces`() {
|
||||||
|
super.`create and drop namespaces`()
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
override fun `create and drop tables`() {
|
||||||
|
super.`create and drop tables`()
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
override fun `insert records`() {
|
||||||
|
super.`insert records`(
|
||||||
|
inputRecords = TableOperationsFixtures.SINGLE_TEST_RECORD_INPUT,
|
||||||
|
expectedRecords = TableOperationsFixtures.SINGLE_TEST_RECORD_EXPECTED,
|
||||||
|
columnNameMapping = testMapping,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
override fun `count table rows`() {
|
||||||
|
super.`count table rows`(columnNameMapping = testMapping)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
override fun `overwrite tables`() {
|
||||||
|
super.`overwrite tables`(
|
||||||
|
sourceInputRecords = TableOperationsFixtures.OVERWRITE_SOURCE_RECORDS,
|
||||||
|
targetInputRecords = TableOperationsFixtures.OVERWRITE_TARGET_RECORDS,
|
||||||
|
expectedRecords = TableOperationsFixtures.OVERWRITE_EXPECTED_RECORDS,
|
||||||
|
columnNameMapping = testMapping,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
override fun `copy tables`() {
|
||||||
|
super.`copy tables`(
|
||||||
|
sourceInputRecords = TableOperationsFixtures.OVERWRITE_SOURCE_RECORDS,
|
||||||
|
targetInputRecords = TableOperationsFixtures.OVERWRITE_TARGET_RECORDS,
|
||||||
|
expectedRecords = TableOperationsFixtures.COPY_EXPECTED_RECORDS,
|
||||||
|
columnNameMapping = testMapping,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
override fun `get generation id`() {
|
||||||
|
super.`get generation id`(columnNameMapping = testMapping)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Re-enable when CDK TableOperationsSuite is fixed to use ID_AND_TEST_SCHEMA for target
|
||||||
|
// table instead of TEST_INTEGER_SCHEMA (the Dedupe mode requires the id column as primary key)
|
||||||
|
@Disabled("CDK TableOperationsSuite bug: target table schema missing 'id' column for Dedupe")
|
||||||
|
@Test
|
||||||
|
override fun `upsert tables`() {
|
||||||
|
super.`upsert tables`(
|
||||||
|
sourceInputRecords = TableOperationsFixtures.UPSERT_SOURCE_RECORDS,
|
||||||
|
targetInputRecords = TableOperationsFixtures.UPSERT_TARGET_RECORDS,
|
||||||
|
expectedRecords = TableOperationsFixtures.UPSERT_EXPECTED_RECORDS,
|
||||||
|
columnNameMapping = idTestWithCdcMapping,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,111 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package io.airbyte.integrations.destination.postgres.component
|
||||||
|
|
||||||
|
import io.airbyte.cdk.load.command.ImportType
|
||||||
|
import io.airbyte.cdk.load.component.TableSchemaEvolutionFixtures
|
||||||
|
import io.airbyte.cdk.load.component.TableSchemaEvolutionSuite
|
||||||
|
import io.airbyte.cdk.load.schema.TableSchemaFactory
|
||||||
|
import io.airbyte.integrations.destination.postgres.client.PostgresAirbyteClient
|
||||||
|
import io.airbyte.integrations.destination.postgres.component.PostgresComponentTestFixtures.allTypesColumnNameMapping
|
||||||
|
import io.airbyte.integrations.destination.postgres.component.PostgresComponentTestFixtures.allTypesTableSchema
|
||||||
|
import io.airbyte.integrations.destination.postgres.component.PostgresComponentTestFixtures.idAndTestMapping
|
||||||
|
import io.airbyte.integrations.destination.postgres.component.PostgresComponentTestFixtures.testMapping
|
||||||
|
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
|
||||||
|
import org.junit.jupiter.api.Test
|
||||||
|
|
||||||
|
@MicronautTest(environments = ["component"], resolveParameters = false)
|
||||||
|
class PostgresTableSchemaEvolutionTest(
|
||||||
|
override val client: PostgresAirbyteClient,
|
||||||
|
override val opsClient: PostgresAirbyteClient,
|
||||||
|
override val testClient: PostgresTestTableOperationsClient,
|
||||||
|
override val schemaFactory: TableSchemaFactory,
|
||||||
|
) : TableSchemaEvolutionSuite {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun `discover recognizes all data types`() {
|
||||||
|
super.`discover recognizes all data types`(allTypesTableSchema, allTypesColumnNameMapping)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun `computeSchema handles all data types`() {
|
||||||
|
super.`computeSchema handles all data types`(allTypesTableSchema, allTypesColumnNameMapping)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
override fun `noop diff`() {
|
||||||
|
super.`noop diff`(testMapping)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
override fun `changeset is correct when adding a column`() {
|
||||||
|
super.`changeset is correct when adding a column`(testMapping, idAndTestMapping)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
override fun `changeset is correct when dropping a column`() {
|
||||||
|
super.`changeset is correct when dropping a column`(idAndTestMapping, testMapping)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
override fun `changeset is correct when changing a column's type`() {
|
||||||
|
super.`changeset is correct when changing a column's type`(testMapping)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
override fun `apply changeset - handle sync mode append`() {
|
||||||
|
super.`apply changeset - handle sync mode append`()
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
override fun `apply changeset - handle changing sync mode from append to dedup`() {
|
||||||
|
super.`apply changeset - handle changing sync mode from append to dedup`()
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
override fun `apply changeset - handle changing sync mode from dedup to append`() {
|
||||||
|
super.`apply changeset - handle changing sync mode from dedup to append`()
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
override fun `apply changeset - handle sync mode dedup`() {
|
||||||
|
super.`apply changeset - handle sync mode dedup`()
|
||||||
|
}
|
||||||
|
|
||||||
|
override fun `apply changeset`(
|
||||||
|
initialStreamImportType: ImportType,
|
||||||
|
modifiedStreamImportType: ImportType,
|
||||||
|
) {
|
||||||
|
super.`apply changeset`(
|
||||||
|
initialColumnNameMapping =
|
||||||
|
TableSchemaEvolutionFixtures.APPLY_CHANGESET_INITIAL_COLUMN_MAPPING,
|
||||||
|
modifiedColumnNameMapping =
|
||||||
|
TableSchemaEvolutionFixtures.APPLY_CHANGESET_MODIFIED_COLUMN_MAPPING,
|
||||||
|
TableSchemaEvolutionFixtures.APPLY_CHANGESET_EXPECTED_EXTRACTED_AT,
|
||||||
|
initialStreamImportType,
|
||||||
|
modifiedStreamImportType,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
override fun `change from string type to unknown type`() {
|
||||||
|
super.`change from string type to unknown type`(
|
||||||
|
idAndTestMapping,
|
||||||
|
idAndTestMapping,
|
||||||
|
TableSchemaEvolutionFixtures.STRING_TO_UNKNOWN_TYPE_INPUT_RECORDS,
|
||||||
|
TableSchemaEvolutionFixtures.STRING_TO_UNKNOWN_TYPE_EXPECTED_RECORDS,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
override fun `change from unknown type to string type`() {
|
||||||
|
super.`change from unknown type to string type`(
|
||||||
|
idAndTestMapping,
|
||||||
|
idAndTestMapping,
|
||||||
|
TableSchemaEvolutionFixtures.UNKNOWN_TO_STRING_TYPE_INPUT_RECORDS,
|
||||||
|
TableSchemaEvolutionFixtures.UNKNOWN_TO_STRING_TYPE_EXPECTED_RECORDS,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,257 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package io.airbyte.integrations.destination.postgres.component
|
||||||
|
|
||||||
|
import io.airbyte.cdk.load.component.TestTableOperationsClient
|
||||||
|
import io.airbyte.cdk.load.data.AirbyteValue
|
||||||
|
import io.airbyte.cdk.load.schema.model.TableName
|
||||||
|
import io.airbyte.cdk.load.util.Jsons
|
||||||
|
import io.airbyte.integrations.destination.postgres.client.PostgresAirbyteClient
|
||||||
|
import io.micronaut.context.annotation.Requires
|
||||||
|
import jakarta.inject.Singleton
|
||||||
|
import java.time.OffsetDateTime
|
||||||
|
import java.time.ZoneOffset
|
||||||
|
import java.time.format.DateTimeFormatter
|
||||||
|
import javax.sql.DataSource
|
||||||
|
|
||||||
|
@Requires(env = ["component"])
|
||||||
|
@Singleton
|
||||||
|
class PostgresTestTableOperationsClient(
|
||||||
|
private val dataSource: DataSource,
|
||||||
|
private val client: PostgresAirbyteClient,
|
||||||
|
) : TestTableOperationsClient {
|
||||||
|
override suspend fun ping() {
|
||||||
|
dataSource.connection.use { connection ->
|
||||||
|
connection.createStatement().use { statement -> statement.executeQuery("SELECT 1") }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
override suspend fun dropNamespace(namespace: String) {
|
||||||
|
dataSource.connection.use { connection ->
|
||||||
|
connection.createStatement().use { statement ->
|
||||||
|
statement.execute("DROP SCHEMA IF EXISTS \"$namespace\" CASCADE")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
override suspend fun insertRecords(table: TableName, records: List<Map<String, AirbyteValue>>) {
|
||||||
|
if (records.isEmpty()) return
|
||||||
|
|
||||||
|
// Get column types from database to handle jsonb columns properly
|
||||||
|
val columnTypes = getColumnTypes(table)
|
||||||
|
|
||||||
|
// Get all unique columns from ALL records to handle sparse data (e.g., CDC deletion column)
|
||||||
|
val columns = records.flatMap { it.keys }.distinct().toList()
|
||||||
|
val columnNames = columns.joinToString(", ") { "\"$it\"" }
|
||||||
|
val placeholders = columns.indices.joinToString(", ") { "?" }
|
||||||
|
|
||||||
|
val sql =
|
||||||
|
"""
|
||||||
|
INSERT INTO "${table.namespace}"."${table.name}" ($columnNames)
|
||||||
|
VALUES ($placeholders)
|
||||||
|
"""
|
||||||
|
|
||||||
|
dataSource.connection.use { connection ->
|
||||||
|
connection.prepareStatement(sql).use { statement ->
|
||||||
|
for (record in records) {
|
||||||
|
columns.forEachIndexed { index, column ->
|
||||||
|
val value = record[column]
|
||||||
|
val columnType = columnTypes[column]
|
||||||
|
setParameterValue(statement, index + 1, value, columnType)
|
||||||
|
}
|
||||||
|
statement.addBatch()
|
||||||
|
}
|
||||||
|
statement.executeBatch()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun getColumnTypes(table: TableName): Map<String, String> {
|
||||||
|
val columnTypes = mutableMapOf<String, String>()
|
||||||
|
dataSource.connection.use { connection ->
|
||||||
|
connection.createStatement().use { statement ->
|
||||||
|
statement
|
||||||
|
.executeQuery(
|
||||||
|
"""
|
||||||
|
SELECT column_name, data_type
|
||||||
|
FROM information_schema.columns
|
||||||
|
WHERE table_schema = '${table.namespace}'
|
||||||
|
AND table_name = '${table.name}'
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
.use { resultSet ->
|
||||||
|
while (resultSet.next()) {
|
||||||
|
columnTypes[resultSet.getString("column_name")] =
|
||||||
|
resultSet.getString("data_type")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return columnTypes
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun setParameterValue(
|
||||||
|
statement: java.sql.PreparedStatement,
|
||||||
|
index: Int,
|
||||||
|
value: AirbyteValue?,
|
||||||
|
columnType: String?
|
||||||
|
) {
|
||||||
|
// If column is jsonb, serialize any value as JSON
|
||||||
|
if (columnType == "jsonb") {
|
||||||
|
if (value == null || value is io.airbyte.cdk.load.data.NullValue) {
|
||||||
|
statement.setNull(index, java.sql.Types.OTHER)
|
||||||
|
} else {
|
||||||
|
val pgObject = org.postgresql.util.PGobject()
|
||||||
|
pgObject.type = "jsonb"
|
||||||
|
pgObject.value = serializeToJson(value)
|
||||||
|
statement.setObject(index, pgObject)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
when (value) {
|
||||||
|
null,
|
||||||
|
is io.airbyte.cdk.load.data.NullValue -> statement.setNull(index, java.sql.Types.NULL)
|
||||||
|
is io.airbyte.cdk.load.data.StringValue -> statement.setString(index, value.value)
|
||||||
|
is io.airbyte.cdk.load.data.IntegerValue ->
|
||||||
|
statement.setLong(index, value.value.toLong())
|
||||||
|
is io.airbyte.cdk.load.data.NumberValue -> statement.setBigDecimal(index, value.value)
|
||||||
|
is io.airbyte.cdk.load.data.BooleanValue -> statement.setBoolean(index, value.value)
|
||||||
|
is io.airbyte.cdk.load.data.TimestampWithTimezoneValue -> {
|
||||||
|
val offsetDateTime = OffsetDateTime.parse(value.value.toString())
|
||||||
|
statement.setObject(index, offsetDateTime)
|
||||||
|
}
|
||||||
|
is io.airbyte.cdk.load.data.TimestampWithoutTimezoneValue -> {
|
||||||
|
val localDateTime = java.time.LocalDateTime.parse(value.value.toString())
|
||||||
|
statement.setObject(index, localDateTime)
|
||||||
|
}
|
||||||
|
is io.airbyte.cdk.load.data.DateValue -> {
|
||||||
|
val localDate = java.time.LocalDate.parse(value.value.toString())
|
||||||
|
statement.setObject(index, localDate)
|
||||||
|
}
|
||||||
|
is io.airbyte.cdk.load.data.TimeWithTimezoneValue -> {
|
||||||
|
statement.setString(index, value.value.toString())
|
||||||
|
}
|
||||||
|
is io.airbyte.cdk.load.data.TimeWithoutTimezoneValue -> {
|
||||||
|
val localTime = java.time.LocalTime.parse(value.value.toString())
|
||||||
|
statement.setObject(index, localTime)
|
||||||
|
}
|
||||||
|
is io.airbyte.cdk.load.data.ObjectValue -> {
|
||||||
|
val pgObject = org.postgresql.util.PGobject()
|
||||||
|
pgObject.type = "jsonb"
|
||||||
|
pgObject.value = Jsons.writeValueAsString(value.values)
|
||||||
|
statement.setObject(index, pgObject)
|
||||||
|
}
|
||||||
|
is io.airbyte.cdk.load.data.ArrayValue -> {
|
||||||
|
val pgObject = org.postgresql.util.PGobject()
|
||||||
|
pgObject.type = "jsonb"
|
||||||
|
pgObject.value = Jsons.writeValueAsString(value.values)
|
||||||
|
statement.setObject(index, pgObject)
|
||||||
|
}
|
||||||
|
else -> {
|
||||||
|
// For unknown types, try to serialize as string
|
||||||
|
statement.setString(index, value.toString())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun serializeToJson(value: AirbyteValue): String {
|
||||||
|
return when (value) {
|
||||||
|
is io.airbyte.cdk.load.data.StringValue -> Jsons.writeValueAsString(value.value)
|
||||||
|
is io.airbyte.cdk.load.data.IntegerValue -> value.value.toString()
|
||||||
|
is io.airbyte.cdk.load.data.NumberValue -> value.value.toString()
|
||||||
|
is io.airbyte.cdk.load.data.BooleanValue -> value.value.toString()
|
||||||
|
is io.airbyte.cdk.load.data.ObjectValue -> Jsons.writeValueAsString(value.values)
|
||||||
|
is io.airbyte.cdk.load.data.ArrayValue -> Jsons.writeValueAsString(value.values)
|
||||||
|
is io.airbyte.cdk.load.data.NullValue -> "null"
|
||||||
|
else -> Jsons.writeValueAsString(value.toString())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
override suspend fun readTable(table: TableName): List<Map<String, Any>> {
|
||||||
|
dataSource.connection.use { connection ->
|
||||||
|
connection.createStatement().use { statement ->
|
||||||
|
statement
|
||||||
|
.executeQuery("""SELECT * FROM "${table.namespace}"."${table.name}"""")
|
||||||
|
.use { resultSet ->
|
||||||
|
val metaData = resultSet.metaData
|
||||||
|
val columnCount = metaData.columnCount
|
||||||
|
val result = mutableListOf<Map<String, Any>>()
|
||||||
|
|
||||||
|
while (resultSet.next()) {
|
||||||
|
val row = mutableMapOf<String, Any>()
|
||||||
|
for (i in 1..columnCount) {
|
||||||
|
val columnName = metaData.getColumnName(i)
|
||||||
|
val columnType = metaData.getColumnTypeName(i)
|
||||||
|
when (columnType.lowercase()) {
|
||||||
|
"timestamptz" -> {
|
||||||
|
val value =
|
||||||
|
resultSet.getObject(i, OffsetDateTime::class.java)
|
||||||
|
if (value != null) {
|
||||||
|
val formattedTimestamp =
|
||||||
|
DateTimeFormatter.ISO_OFFSET_DATE_TIME.format(
|
||||||
|
value.withOffsetSameInstant(ZoneOffset.UTC)
|
||||||
|
)
|
||||||
|
row[columnName] = formattedTimestamp
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"timestamp" -> {
|
||||||
|
val value = resultSet.getTimestamp(i)
|
||||||
|
if (value != null) {
|
||||||
|
val localDateTime = value.toLocalDateTime()
|
||||||
|
row[columnName] =
|
||||||
|
DateTimeFormatter.ISO_LOCAL_DATE_TIME.format(
|
||||||
|
localDateTime
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"jsonb",
|
||||||
|
"json" -> {
|
||||||
|
val stringValue: String? = resultSet.getString(i)
|
||||||
|
if (stringValue != null) {
|
||||||
|
val parsedValue =
|
||||||
|
Jsons.readValue(stringValue, Any::class.java)
|
||||||
|
val actualValue =
|
||||||
|
when (parsedValue) {
|
||||||
|
is Int -> parsedValue.toLong()
|
||||||
|
else -> parsedValue
|
||||||
|
}
|
||||||
|
row[columnName] = actualValue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else -> {
|
||||||
|
val value = resultSet.getObject(i)
|
||||||
|
if (value != null) {
|
||||||
|
// For varchar columns that may contain JSON (from
|
||||||
|
// schema evolution),
|
||||||
|
// normalize the JSON to compact format for comparison
|
||||||
|
if (
|
||||||
|
value is String &&
|
||||||
|
(value.startsWith("{") || value.startsWith("["))
|
||||||
|
) {
|
||||||
|
try {
|
||||||
|
val parsed =
|
||||||
|
Jsons.readValue(value, Any::class.java)
|
||||||
|
row[columnName] =
|
||||||
|
Jsons.writeValueAsString(parsed)
|
||||||
|
} catch (_: Exception) {
|
||||||
|
row[columnName] = value
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
row[columnName] = value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result.add(row)
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -31,7 +31,7 @@ import io.airbyte.cdk.load.data.TimestampWithoutTimezoneValue
|
|||||||
import io.airbyte.cdk.load.data.UnknownType
|
import io.airbyte.cdk.load.data.UnknownType
|
||||||
import io.airbyte.cdk.load.data.json.toAirbyteValue
|
import io.airbyte.cdk.load.data.json.toAirbyteValue
|
||||||
import io.airbyte.cdk.load.message.Meta
|
import io.airbyte.cdk.load.message.Meta
|
||||||
import io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping.TypingDedupingUtil
|
import io.airbyte.cdk.load.table.TypingDedupingUtil
|
||||||
import io.airbyte.cdk.load.test.util.DestinationDataDumper
|
import io.airbyte.cdk.load.test.util.DestinationDataDumper
|
||||||
import io.airbyte.cdk.load.test.util.OutputRecord
|
import io.airbyte.cdk.load.test.util.OutputRecord
|
||||||
import io.airbyte.cdk.load.util.deserializeToNode
|
import io.airbyte.cdk.load.util.deserializeToNode
|
||||||
@@ -267,7 +267,7 @@ class PostgresRawDataDumper(
|
|||||||
.lowercase()
|
.lowercase()
|
||||||
.toPostgresCompatibleName()
|
.toPostgresCompatibleName()
|
||||||
|
|
||||||
val fullyQualifiedTableName = "$rawNamespace.$rawName"
|
val fullyQualifiedTableName = "\"$rawNamespace\".\"$rawName\""
|
||||||
|
|
||||||
// Check if table exists first
|
// Check if table exists first
|
||||||
val tableExistsQuery =
|
val tableExistsQuery =
|
||||||
@@ -302,6 +302,26 @@ class PostgresRawDataDumper(
|
|||||||
false
|
false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Build the column name mapping from original names to transformed names
|
||||||
|
// We use the stream schema to get the original field names, then transform them
|
||||||
|
// using the postgres name transformation logic
|
||||||
|
val finalToInputColumnNames = mutableMapOf<String, String>()
|
||||||
|
if (stream.schema is ObjectType) {
|
||||||
|
val objectSchema = stream.schema as ObjectType
|
||||||
|
for (fieldName in objectSchema.properties.keys) {
|
||||||
|
val transformedName = fieldName.toPostgresCompatibleName()
|
||||||
|
// Map transformed name back to original name
|
||||||
|
finalToInputColumnNames[transformedName] = fieldName
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Also check if inputToFinalColumnNames mapping is available
|
||||||
|
val inputToFinalColumnNames =
|
||||||
|
stream.tableSchema.columnSchema.inputToFinalColumnNames
|
||||||
|
// Add entries from the existing mapping (in case it was populated)
|
||||||
|
for ((input, final) in inputToFinalColumnNames) {
|
||||||
|
finalToInputColumnNames[final] = input
|
||||||
|
}
|
||||||
|
|
||||||
while (resultSet.next()) {
|
while (resultSet.next()) {
|
||||||
val rawData =
|
val rawData =
|
||||||
if (hasDataColumn) {
|
if (hasDataColumn) {
|
||||||
@@ -313,8 +333,22 @@ class PostgresRawDataDumper(
|
|||||||
else -> dataObject?.toString() ?: "{}"
|
else -> dataObject?.toString() ?: "{}"
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse JSON to AirbyteValue, then coerce it to match the schema
|
// Parse JSON to AirbyteValue, then map column names back to originals
|
||||||
dataJson?.deserializeToNode()?.toAirbyteValue() ?: NullValue
|
val parsedValue =
|
||||||
|
dataJson?.deserializeToNode()?.toAirbyteValue() ?: NullValue
|
||||||
|
// If the parsed value is an ObjectValue, map the column names back
|
||||||
|
if (parsedValue is ObjectValue) {
|
||||||
|
val mappedProperties = linkedMapOf<String, AirbyteValue>()
|
||||||
|
for ((key, value) in parsedValue.values) {
|
||||||
|
// Map final column name back to input column name if mapping
|
||||||
|
// exists
|
||||||
|
val originalKey = finalToInputColumnNames[key] ?: key
|
||||||
|
mappedProperties[originalKey] = value
|
||||||
|
}
|
||||||
|
ObjectValue(mappedProperties)
|
||||||
|
} else {
|
||||||
|
parsedValue
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// Typed table mode: read from individual columns and reconstruct the
|
// Typed table mode: read from individual columns and reconstruct the
|
||||||
// object
|
// object
|
||||||
@@ -333,10 +367,19 @@ class PostgresRawDataDumper(
|
|||||||
|
|
||||||
for ((fieldName, fieldType) in objectSchema.properties) {
|
for ((fieldName, fieldType) in objectSchema.properties) {
|
||||||
try {
|
try {
|
||||||
|
// Map input field name to the transformed final column name
|
||||||
|
// First check the inputToFinalColumnNames mapping, then
|
||||||
|
// fall
|
||||||
|
// back to applying postgres transformation directly
|
||||||
|
val transformedColumnName =
|
||||||
|
inputToFinalColumnNames[fieldName]
|
||||||
|
?: fieldName.toPostgresCompatibleName()
|
||||||
|
|
||||||
// Try to find the actual column name (case-insensitive
|
// Try to find the actual column name (case-insensitive
|
||||||
// lookup)
|
// lookup)
|
||||||
val actualColumnName =
|
val actualColumnName =
|
||||||
columnMap[fieldName.lowercase()] ?: fieldName
|
columnMap[transformedColumnName.lowercase()]
|
||||||
|
?: transformedColumnName
|
||||||
val columnValue = resultSet.getObject(actualColumnName)
|
val columnValue = resultSet.getObject(actualColumnName)
|
||||||
properties[fieldName] =
|
properties[fieldName] =
|
||||||
when (columnValue) {
|
when (columnValue) {
|
||||||
|
|||||||
@@ -5,8 +5,8 @@
|
|||||||
package io.airbyte.integrations.destination.postgres.check
|
package io.airbyte.integrations.destination.postgres.check
|
||||||
|
|
||||||
import io.airbyte.cdk.load.command.DestinationStream
|
import io.airbyte.cdk.load.command.DestinationStream
|
||||||
|
import io.airbyte.cdk.load.schema.model.TableName
|
||||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||||
import io.airbyte.cdk.load.table.TableName
|
|
||||||
import io.airbyte.integrations.destination.postgres.client.PostgresAirbyteClient
|
import io.airbyte.integrations.destination.postgres.client.PostgresAirbyteClient
|
||||||
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
|
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
|
||||||
import io.mockk.coEvery
|
import io.mockk.coEvery
|
||||||
|
|||||||
@@ -6,8 +6,8 @@ package io.airbyte.integrations.destination.postgres.client
|
|||||||
|
|
||||||
import io.airbyte.cdk.load.command.DestinationStream
|
import io.airbyte.cdk.load.command.DestinationStream
|
||||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_GENERATION_ID
|
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_GENERATION_ID
|
||||||
|
import io.airbyte.cdk.load.schema.model.TableName
|
||||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||||
import io.airbyte.cdk.load.table.TableName
|
|
||||||
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
|
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
|
||||||
import io.airbyte.integrations.destination.postgres.sql.COUNT_TOTAL_ALIAS
|
import io.airbyte.integrations.destination.postgres.sql.COUNT_TOTAL_ALIAS
|
||||||
import io.airbyte.integrations.destination.postgres.sql.Column
|
import io.airbyte.integrations.destination.postgres.sql.Column
|
||||||
|
|||||||
@@ -12,9 +12,9 @@ import io.airbyte.cdk.load.data.IntegerType
|
|||||||
import io.airbyte.cdk.load.data.ObjectType
|
import io.airbyte.cdk.load.data.ObjectType
|
||||||
import io.airbyte.cdk.load.data.StringType
|
import io.airbyte.cdk.load.data.StringType
|
||||||
import io.airbyte.cdk.load.data.TimestampTypeWithTimezone
|
import io.airbyte.cdk.load.data.TimestampTypeWithTimezone
|
||||||
|
import io.airbyte.cdk.load.schema.model.TableName
|
||||||
import io.airbyte.cdk.load.table.CDC_DELETED_AT_COLUMN
|
import io.airbyte.cdk.load.table.CDC_DELETED_AT_COLUMN
|
||||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||||
import io.airbyte.cdk.load.table.TableName
|
|
||||||
import io.airbyte.integrations.destination.postgres.spec.CdcDeletionMode
|
import io.airbyte.integrations.destination.postgres.spec.CdcDeletionMode
|
||||||
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
|
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
|
||||||
import io.mockk.every
|
import io.mockk.every
|
||||||
|
|||||||
@@ -5,18 +5,19 @@
|
|||||||
package io.airbyte.integrations.destination.postgres.write
|
package io.airbyte.integrations.destination.postgres.write
|
||||||
|
|
||||||
import io.airbyte.cdk.load.command.Dedupe
|
import io.airbyte.cdk.load.command.Dedupe
|
||||||
|
import io.airbyte.cdk.load.command.DestinationCatalog
|
||||||
import io.airbyte.cdk.load.command.DestinationStream
|
import io.airbyte.cdk.load.command.DestinationStream
|
||||||
import io.airbyte.cdk.load.command.ImportType
|
import io.airbyte.cdk.load.command.ImportType
|
||||||
import io.airbyte.cdk.load.orchestration.db.DatabaseInitialStatusGatherer
|
import io.airbyte.cdk.load.schema.model.ColumnSchema
|
||||||
import io.airbyte.cdk.load.orchestration.db.TempTableNameGenerator
|
import io.airbyte.cdk.load.schema.model.StreamTableSchema
|
||||||
import io.airbyte.cdk.load.orchestration.db.direct_load_table.DirectLoadInitialStatus
|
import io.airbyte.cdk.load.schema.model.TableName
|
||||||
import io.airbyte.cdk.load.orchestration.db.direct_load_table.DirectLoadTableAppendStreamLoader
|
import io.airbyte.cdk.load.schema.model.TableNames
|
||||||
import io.airbyte.cdk.load.orchestration.db.direct_load_table.DirectLoadTableDedupStreamLoader
|
import io.airbyte.cdk.load.table.DatabaseInitialStatusGatherer
|
||||||
import io.airbyte.cdk.load.orchestration.db.direct_load_table.DirectLoadTableExecutionConfig
|
import io.airbyte.cdk.load.table.TempTableNameGenerator
|
||||||
import io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping.TableCatalog
|
import io.airbyte.cdk.load.table.directload.DirectLoadInitialStatus
|
||||||
import io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping.TableNameInfo
|
import io.airbyte.cdk.load.table.directload.DirectLoadTableAppendStreamLoader
|
||||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
import io.airbyte.cdk.load.table.directload.DirectLoadTableDedupStreamLoader
|
||||||
import io.airbyte.cdk.load.table.TableName
|
import io.airbyte.cdk.load.table.directload.DirectLoadTableExecutionConfig
|
||||||
import io.airbyte.cdk.load.write.StreamStateStore
|
import io.airbyte.cdk.load.write.StreamStateStore
|
||||||
import io.airbyte.integrations.destination.postgres.client.PostgresAirbyteClient
|
import io.airbyte.integrations.destination.postgres.client.PostgresAirbyteClient
|
||||||
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
|
import io.airbyte.integrations.destination.postgres.spec.PostgresConfiguration
|
||||||
@@ -33,7 +34,7 @@ import org.junit.jupiter.api.Test
|
|||||||
class PostgresWriterTest {
|
class PostgresWriterTest {
|
||||||
|
|
||||||
private lateinit var writer: PostgresWriter
|
private lateinit var writer: PostgresWriter
|
||||||
private lateinit var names: TableCatalog
|
private lateinit var catalog: DestinationCatalog
|
||||||
private lateinit var stateGatherer: DatabaseInitialStatusGatherer<DirectLoadInitialStatus>
|
private lateinit var stateGatherer: DatabaseInitialStatusGatherer<DirectLoadInitialStatus>
|
||||||
private lateinit var streamStateStore: StreamStateStore<DirectLoadTableExecutionConfig>
|
private lateinit var streamStateStore: StreamStateStore<DirectLoadTableExecutionConfig>
|
||||||
private lateinit var postgresClient: PostgresAirbyteClient
|
private lateinit var postgresClient: PostgresAirbyteClient
|
||||||
@@ -42,7 +43,7 @@ class PostgresWriterTest {
|
|||||||
|
|
||||||
@BeforeEach
|
@BeforeEach
|
||||||
fun setup() {
|
fun setup() {
|
||||||
names = mockk()
|
catalog = mockk()
|
||||||
stateGatherer = mockk()
|
stateGatherer = mockk()
|
||||||
streamStateStore = mockk()
|
streamStateStore = mockk()
|
||||||
postgresClient = mockk()
|
postgresClient = mockk()
|
||||||
@@ -51,7 +52,7 @@ class PostgresWriterTest {
|
|||||||
|
|
||||||
writer =
|
writer =
|
||||||
PostgresWriter(
|
PostgresWriter(
|
||||||
names,
|
catalog,
|
||||||
stateGatherer,
|
stateGatherer,
|
||||||
streamStateStore,
|
streamStateStore,
|
||||||
postgresClient,
|
postgresClient,
|
||||||
@@ -66,27 +67,28 @@ class PostgresWriterTest {
|
|||||||
|
|
||||||
val stream = mockk<DestinationStream>()
|
val stream = mockk<DestinationStream>()
|
||||||
val finalTableName = TableName("ns", "name")
|
val finalTableName = TableName("ns", "name")
|
||||||
val mapping = mockk<ColumnNameMapping>(relaxed = true)
|
|
||||||
|
|
||||||
val tableNameInfo = mockk<TableNameInfo>(relaxed = true)
|
val tableNames = TableNames(finalTableName = finalTableName)
|
||||||
every { tableNameInfo.tableNames.finalTableName } returns finalTableName
|
val columnSchema =
|
||||||
every { tableNameInfo.columnNameMapping } returns mapping
|
ColumnSchema(
|
||||||
every { tableNameInfo.component1() } answers { tableNameInfo.tableNames }
|
inputSchema = emptyMap(),
|
||||||
every { tableNameInfo.component2() } answers { tableNameInfo.columnNameMapping }
|
inputToFinalColumnNames = emptyMap(),
|
||||||
|
finalSchema = emptyMap()
|
||||||
|
)
|
||||||
|
val importType = Dedupe(primaryKey = emptyList(), cursor = emptyList())
|
||||||
|
val tableSchema = StreamTableSchema(tableNames, columnSchema, importType)
|
||||||
|
|
||||||
every { stream.importType } returns Dedupe(primaryKey = emptyList(), cursor = emptyList())
|
every { stream.tableSchema } returns tableSchema
|
||||||
|
every { stream.importType } returns importType
|
||||||
every { stream.minimumGenerationId } returns 0L
|
every { stream.minimumGenerationId } returns 0L
|
||||||
every { stream.generationId } returns 1L
|
every { stream.generationId } returns 1L
|
||||||
|
|
||||||
// Mock names map behavior
|
every { catalog.streams } returns listOf(stream)
|
||||||
val namesMap = mapOf(stream to tableNameInfo)
|
|
||||||
every { names.values } returns namesMap.values
|
|
||||||
every { names[stream] } returns tableNameInfo
|
|
||||||
|
|
||||||
coEvery { postgresClient.createNamespace(any()) } just Runs
|
coEvery { postgresClient.createNamespace(any()) } just Runs
|
||||||
|
|
||||||
val initialStatus = mockk<DirectLoadInitialStatus>()
|
val initialStatus = mockk<DirectLoadInitialStatus>()
|
||||||
coEvery { stateGatherer.gatherInitialStatus(names) } returns mapOf(stream to initialStatus)
|
coEvery { stateGatherer.gatherInitialStatus() } returns mapOf(stream to initialStatus)
|
||||||
|
|
||||||
every { tempTableNameGenerator.generate(finalTableName) } returns
|
every { tempTableNameGenerator.generate(finalTableName) } returns
|
||||||
TableName("ns", "temp_name")
|
TableName("ns", "temp_name")
|
||||||
@@ -103,27 +105,28 @@ class PostgresWriterTest {
|
|||||||
|
|
||||||
val stream = mockk<DestinationStream>()
|
val stream = mockk<DestinationStream>()
|
||||||
val finalTableName = TableName("ns", "name")
|
val finalTableName = TableName("ns", "name")
|
||||||
val mapping = mockk<ColumnNameMapping>(relaxed = true)
|
|
||||||
|
|
||||||
val tableNameInfo = mockk<TableNameInfo>(relaxed = true)
|
val tableNames = TableNames(finalTableName = finalTableName)
|
||||||
every { tableNameInfo.tableNames.finalTableName } returns finalTableName
|
val columnSchema =
|
||||||
every { tableNameInfo.columnNameMapping } returns mapping
|
ColumnSchema(
|
||||||
every { tableNameInfo.component1() } answers { tableNameInfo.tableNames }
|
inputSchema = emptyMap(),
|
||||||
every { tableNameInfo.component2() } answers { tableNameInfo.columnNameMapping }
|
inputToFinalColumnNames = emptyMap(),
|
||||||
|
finalSchema = emptyMap()
|
||||||
|
)
|
||||||
|
val importType = Dedupe(primaryKey = emptyList(), cursor = emptyList())
|
||||||
|
val tableSchema = StreamTableSchema(tableNames, columnSchema, importType)
|
||||||
|
|
||||||
every { stream.importType } returns Dedupe(primaryKey = emptyList(), cursor = emptyList())
|
every { stream.tableSchema } returns tableSchema
|
||||||
|
every { stream.importType } returns importType
|
||||||
every { stream.minimumGenerationId } returns 0L
|
every { stream.minimumGenerationId } returns 0L
|
||||||
every { stream.generationId } returns 1L
|
every { stream.generationId } returns 1L
|
||||||
|
|
||||||
// Mock names map behavior
|
every { catalog.streams } returns listOf(stream)
|
||||||
val namesMap = mapOf(stream to tableNameInfo)
|
|
||||||
every { names.values } returns namesMap.values
|
|
||||||
every { names[stream] } returns tableNameInfo
|
|
||||||
|
|
||||||
coEvery { postgresClient.createNamespace(any()) } just Runs
|
coEvery { postgresClient.createNamespace(any()) } just Runs
|
||||||
|
|
||||||
val initialStatus = mockk<DirectLoadInitialStatus>()
|
val initialStatus = mockk<DirectLoadInitialStatus>()
|
||||||
coEvery { stateGatherer.gatherInitialStatus(names) } returns mapOf(stream to initialStatus)
|
coEvery { stateGatherer.gatherInitialStatus() } returns mapOf(stream to initialStatus)
|
||||||
|
|
||||||
every { tempTableNameGenerator.generate(finalTableName) } returns
|
every { tempTableNameGenerator.generate(finalTableName) } returns
|
||||||
TableName("ns", "temp_name")
|
TableName("ns", "temp_name")
|
||||||
@@ -143,29 +146,29 @@ class PostgresWriterTest {
|
|||||||
|
|
||||||
val stream = mockk<DestinationStream>()
|
val stream = mockk<DestinationStream>()
|
||||||
val finalTableName = TableName("ns", "name")
|
val finalTableName = TableName("ns", "name")
|
||||||
val mapping = mockk<ColumnNameMapping>(relaxed = true)
|
|
||||||
|
|
||||||
val tableNameInfo = mockk<TableNameInfo>(relaxed = true)
|
|
||||||
every { tableNameInfo.tableNames.finalTableName } returns finalTableName
|
|
||||||
every { tableNameInfo.columnNameMapping } returns mapping
|
|
||||||
every { tableNameInfo.component1() } answers { tableNameInfo.tableNames }
|
|
||||||
every { tableNameInfo.component2() } answers { tableNameInfo.columnNameMapping }
|
|
||||||
|
|
||||||
|
val tableNames = TableNames(finalTableName = finalTableName)
|
||||||
|
val columnSchema =
|
||||||
|
ColumnSchema(
|
||||||
|
inputSchema = emptyMap(),
|
||||||
|
inputToFinalColumnNames = emptyMap(),
|
||||||
|
finalSchema = emptyMap()
|
||||||
|
)
|
||||||
// Use a mock for ImportType that is NOT Dedupe
|
// Use a mock for ImportType that is NOT Dedupe
|
||||||
val appendImportType = mockk<ImportType>()
|
val appendImportType = mockk<ImportType>()
|
||||||
|
val tableSchema = StreamTableSchema(tableNames, columnSchema, appendImportType)
|
||||||
|
|
||||||
|
every { stream.tableSchema } returns tableSchema
|
||||||
every { stream.importType } returns appendImportType
|
every { stream.importType } returns appendImportType
|
||||||
every { stream.minimumGenerationId } returns 0L
|
every { stream.minimumGenerationId } returns 0L
|
||||||
every { stream.generationId } returns 1L
|
every { stream.generationId } returns 1L
|
||||||
|
|
||||||
// Mock names map behavior
|
every { catalog.streams } returns listOf(stream)
|
||||||
val namesMap = mapOf(stream to tableNameInfo)
|
|
||||||
every { names.values } returns namesMap.values
|
|
||||||
every { names[stream] } returns tableNameInfo
|
|
||||||
|
|
||||||
coEvery { postgresClient.createNamespace(any()) } just Runs
|
coEvery { postgresClient.createNamespace(any()) } just Runs
|
||||||
|
|
||||||
val initialStatus = mockk<DirectLoadInitialStatus>()
|
val initialStatus = mockk<DirectLoadInitialStatus>()
|
||||||
coEvery { stateGatherer.gatherInitialStatus(names) } returns mapOf(stream to initialStatus)
|
coEvery { stateGatherer.gatherInitialStatus() } returns mapOf(stream to initialStatus)
|
||||||
|
|
||||||
every { tempTableNameGenerator.generate(finalTableName) } returns
|
every { tempTableNameGenerator.generate(finalTableName) } returns
|
||||||
TableName("ns", "temp_name")
|
TableName("ns", "temp_name")
|
||||||
|
|||||||
@@ -580,12 +580,37 @@ dynamic_streams:
|
|||||||
- table_id
|
- table_id
|
||||||
value: "{{ components_values.id }}"
|
value: "{{ components_values.id }}"
|
||||||
|
|
||||||
|
# Rate limits: https://airtable.com/developers/web/api/rate-limits
|
||||||
|
# - 5 requests per second per base
|
||||||
|
# - 50 requests per second per user/service account
|
||||||
|
api_budget:
|
||||||
|
type: HTTPAPIBudget
|
||||||
|
policies:
|
||||||
|
- type: MovingWindowCallRatePolicy
|
||||||
|
rates:
|
||||||
|
- limit: 5
|
||||||
|
interval: PT1S
|
||||||
|
matchers: [] # Applies to all endpoints
|
||||||
|
status_codes_for_ratelimit_hit: [429]
|
||||||
|
|
||||||
|
concurrency_level:
|
||||||
|
type: ConcurrencyLevel
|
||||||
|
default_concurrency: "{{ config.get('num_workers', 5) }}"
|
||||||
|
max_concurrency: 40
|
||||||
|
|
||||||
spec:
|
spec:
|
||||||
type: Spec
|
type: Spec
|
||||||
connection_specification:
|
connection_specification:
|
||||||
type: object
|
type: object
|
||||||
$schema: http://json-schema.org/draft-07/schema#
|
$schema: http://json-schema.org/draft-07/schema#
|
||||||
properties:
|
properties:
|
||||||
|
num_workers:
|
||||||
|
type: integer
|
||||||
|
title: Number of Concurrent Workers
|
||||||
|
description: Number of concurrent threads for syncing. Higher values can speed up syncs but may hit rate limits. Airtable limits to 5 requests per second per base.
|
||||||
|
default: 5
|
||||||
|
minimum: 2
|
||||||
|
maximum: 40
|
||||||
credentials:
|
credentials:
|
||||||
title: Authentication
|
title: Authentication
|
||||||
type: object
|
type: object
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ data:
|
|||||||
connectorSubtype: api
|
connectorSubtype: api
|
||||||
connectorType: source
|
connectorType: source
|
||||||
definitionId: 14c6e7ea-97ed-4f5e-a7b5-25e9a80b8212
|
definitionId: 14c6e7ea-97ed-4f5e-a7b5-25e9a80b8212
|
||||||
dockerImageTag: 4.6.15
|
dockerImageTag: 4.6.16-rc.1
|
||||||
dockerRepository: airbyte/source-airtable
|
dockerRepository: airbyte/source-airtable
|
||||||
documentationUrl: https://docs.airbyte.com/integrations/sources/airtable
|
documentationUrl: https://docs.airbyte.com/integrations/sources/airtable
|
||||||
externalDocumentationUrls:
|
externalDocumentationUrls:
|
||||||
@@ -53,7 +53,7 @@ data:
|
|||||||
message: This release introduces changes to columns with formula to parse values directly from `array` to `string` or `number` (where it is possible). Users should refresh the source schema and reset affected streams after upgrading to ensure uninterrupted syncs.
|
message: This release introduces changes to columns with formula to parse values directly from `array` to `string` or `number` (where it is possible). Users should refresh the source schema and reset affected streams after upgrading to ensure uninterrupted syncs.
|
||||||
upgradeDeadline: "2023-10-23"
|
upgradeDeadline: "2023-10-23"
|
||||||
rolloutConfiguration:
|
rolloutConfiguration:
|
||||||
enableProgressiveRollout: false
|
enableProgressiveRollout: true
|
||||||
supportLevel: certified
|
supportLevel: certified
|
||||||
tags:
|
tags:
|
||||||
- language:manifest-only
|
- language:manifest-only
|
||||||
|
|||||||
@@ -3487,7 +3487,7 @@ spec:
|
|||||||
type: integer
|
type: integer
|
||||||
title: Number of concurrent workers
|
title: Number of concurrent workers
|
||||||
minimum: 2
|
minimum: 2
|
||||||
maximum: 10
|
maximum: 20
|
||||||
default: 10
|
default: 10
|
||||||
examples:
|
examples:
|
||||||
- 2
|
- 2
|
||||||
@@ -3547,7 +3547,7 @@ spec:
|
|||||||
concurrency_level:
|
concurrency_level:
|
||||||
type: ConcurrencyLevel
|
type: ConcurrencyLevel
|
||||||
default_concurrency: "{{ config.get('num_workers', 10) }}"
|
default_concurrency: "{{ config.get('num_workers', 10) }}"
|
||||||
max_concurrency: 10
|
max_concurrency: 20
|
||||||
|
|
||||||
schemas:
|
schemas:
|
||||||
attribution_report_performance_adgroup:
|
attribution_report_performance_adgroup:
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ data:
|
|||||||
connectorSubtype: api
|
connectorSubtype: api
|
||||||
connectorType: source
|
connectorType: source
|
||||||
definitionId: c6b0a29e-1da9-4512-9002-7bfd0cba2246
|
definitionId: c6b0a29e-1da9-4512-9002-7bfd0cba2246
|
||||||
dockerImageTag: 7.3.8
|
dockerImageTag: 7.3.9
|
||||||
dockerRepository: airbyte/source-amazon-ads
|
dockerRepository: airbyte/source-amazon-ads
|
||||||
documentationUrl: https://docs.airbyte.com/integrations/sources/amazon-ads
|
documentationUrl: https://docs.airbyte.com/integrations/sources/amazon-ads
|
||||||
githubIssueLabel: source-amazon-ads
|
githubIssueLabel: source-amazon-ads
|
||||||
|
|||||||
@@ -854,7 +854,6 @@ definitions:
|
|||||||
name: ListFinancialEventGroups
|
name: ListFinancialEventGroups
|
||||||
primary_key:
|
primary_key:
|
||||||
- FinancialEventGroupId
|
- FinancialEventGroupId
|
||||||
ignore_stream_slicer_parameters_on_paginated_requests: false
|
|
||||||
incremental_sync:
|
incremental_sync:
|
||||||
type: DatetimeBasedCursor
|
type: DatetimeBasedCursor
|
||||||
cursor_field: FinancialEventGroupStart
|
cursor_field: FinancialEventGroupStart
|
||||||
@@ -881,6 +880,7 @@ definitions:
|
|||||||
cursor_granularity: "PT1S"
|
cursor_granularity: "PT1S"
|
||||||
retriever:
|
retriever:
|
||||||
type: SimpleRetriever
|
type: SimpleRetriever
|
||||||
|
ignore_stream_slicer_parameters_on_paginated_requests: true
|
||||||
requester:
|
requester:
|
||||||
$ref: "#/definitions/base_requester"
|
$ref: "#/definitions/base_requester"
|
||||||
path: "finances/v0/financialEventGroups"
|
path: "finances/v0/financialEventGroups"
|
||||||
@@ -929,7 +929,6 @@ definitions:
|
|||||||
type: DeclarativeStream
|
type: DeclarativeStream
|
||||||
name: ListFinancialEvents
|
name: ListFinancialEvents
|
||||||
primary_key: []
|
primary_key: []
|
||||||
ignore_stream_slicer_parameters_on_paginated_requests: false
|
|
||||||
incremental_sync:
|
incremental_sync:
|
||||||
type: DatetimeBasedCursor
|
type: DatetimeBasedCursor
|
||||||
cursor_field: PostedBefore
|
cursor_field: PostedBefore
|
||||||
@@ -961,6 +960,7 @@ definitions:
|
|||||||
value: "{{ stream_slice['end_time'] }}"
|
value: "{{ stream_slice['end_time'] }}"
|
||||||
retriever:
|
retriever:
|
||||||
type: SimpleRetriever
|
type: SimpleRetriever
|
||||||
|
ignore_stream_slicer_parameters_on_paginated_requests: true
|
||||||
requester:
|
requester:
|
||||||
$ref: "#/definitions/base_requester"
|
$ref: "#/definitions/base_requester"
|
||||||
path: "finances/v0/financialEvents"
|
path: "finances/v0/financialEvents"
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ data:
|
|||||||
connectorSubtype: api
|
connectorSubtype: api
|
||||||
connectorType: source
|
connectorType: source
|
||||||
definitionId: e55879a8-0ef8-4557-abcf-ab34c53ec460
|
definitionId: e55879a8-0ef8-4557-abcf-ab34c53ec460
|
||||||
dockerImageTag: 5.0.0
|
dockerImageTag: 5.0.1
|
||||||
dockerRepository: airbyte/source-amazon-seller-partner
|
dockerRepository: airbyte/source-amazon-seller-partner
|
||||||
documentationUrl: https://docs.airbyte.com/integrations/sources/amazon-seller-partner
|
documentationUrl: https://docs.airbyte.com/integrations/sources/amazon-seller-partner
|
||||||
erdUrl: https://dbdocs.io/airbyteio/source-amazon-seller-partner?view=relationships
|
erdUrl: https://dbdocs.io/airbyteio/source-amazon-seller-partner?view=relationships
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ data:
|
|||||||
connectorSubtype: api
|
connectorSubtype: api
|
||||||
connectorType: source
|
connectorType: source
|
||||||
definitionId: d0243522-dccf-4978-8ba0-37ed47a0bdbf
|
definitionId: d0243522-dccf-4978-8ba0-37ed47a0bdbf
|
||||||
dockerImageTag: 1.5.0
|
dockerImageTag: 1.5.1
|
||||||
dockerRepository: airbyte/source-asana
|
dockerRepository: airbyte/source-asana
|
||||||
githubIssueLabel: source-asana
|
githubIssueLabel: source-asana
|
||||||
icon: asana.svg
|
icon: asana.svg
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",]
|
|||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
version = "1.5.0"
|
version = "1.5.1"
|
||||||
name = "source-asana"
|
name = "source-asana"
|
||||||
description = "Source implementation for asana."
|
description = "Source implementation for asana."
|
||||||
authors = [ "Airbyte <contact@airbyte.io>",]
|
authors = [ "Airbyte <contact@airbyte.io>",]
|
||||||
@@ -12,7 +12,8 @@ readme = "README.md"
|
|||||||
documentation = "https://docs.airbyte.com/integrations/sources/asana"
|
documentation = "https://docs.airbyte.com/integrations/sources/asana"
|
||||||
homepage = "https://airbyte.com"
|
homepage = "https://airbyte.com"
|
||||||
repository = "https://github.com/airbytehq/airbyte"
|
repository = "https://github.com/airbytehq/airbyte"
|
||||||
packages = [ { include = "source_asana" }, {include = "main.py" } ]
|
[[tool.poetry.packages]]
|
||||||
|
include = "source_asana"
|
||||||
|
|
||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = "^3.10,<3.12"
|
python = "^3.10,<3.12"
|
||||||
|
|||||||
@@ -2641,6 +2641,8 @@ spec:
|
|||||||
title: Organization Export IDs
|
title: Organization Export IDs
|
||||||
description: Globally unique identifiers for the organization exports
|
description: Globally unique identifiers for the organization exports
|
||||||
type: array
|
type: array
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
num_workers:
|
num_workers:
|
||||||
type: integer
|
type: integer
|
||||||
title: Number of concurrent workers
|
title: Number of concurrent workers
|
||||||
|
|||||||
@@ -1,3 +1,46 @@
|
|||||||
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Mapping
|
||||||
|
|
||||||
|
from pytest import fixture
|
||||||
|
|
||||||
|
from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource
|
||||||
|
from airbyte_cdk.test.catalog_builder import CatalogBuilder
|
||||||
|
from airbyte_cdk.test.state_builder import StateBuilder
|
||||||
|
|
||||||
|
|
||||||
pytest_plugins = ["airbyte_cdk.test.utils.manifest_only_fixtures"]
|
pytest_plugins = ["airbyte_cdk.test.utils.manifest_only_fixtures"]
|
||||||
|
os.environ["REQUEST_CACHE_PATH"] = "REQUEST_CACHE_PATH"
|
||||||
|
|
||||||
|
|
||||||
|
def _get_manifest_path() -> Path:
|
||||||
|
"""Get path to manifest.yaml, handling both CI and local environments."""
|
||||||
|
ci_path = Path("/airbyte/integration_code/source_declarative_manifest")
|
||||||
|
if ci_path.exists():
|
||||||
|
return ci_path
|
||||||
|
return Path(__file__).parent.parent
|
||||||
|
|
||||||
|
|
||||||
|
_SOURCE_FOLDER_PATH = _get_manifest_path()
|
||||||
|
_YAML_FILE_PATH = _SOURCE_FOLDER_PATH / "manifest.yaml"
|
||||||
|
sys.path.append(str(_SOURCE_FOLDER_PATH))
|
||||||
|
|
||||||
|
|
||||||
|
def get_source(config: Mapping[str, Any], state=None) -> YamlDeclarativeSource:
|
||||||
|
"""Create a YamlDeclarativeSource instance with the given config."""
|
||||||
|
catalog = CatalogBuilder().build()
|
||||||
|
state = StateBuilder().build() if not state else state
|
||||||
|
return YamlDeclarativeSource(path_to_yaml=str(_YAML_FILE_PATH), catalog=catalog, config=config, state=state)
|
||||||
|
|
||||||
|
|
||||||
|
@fixture(autouse=True)
|
||||||
|
def clear_cache_before_each_test():
|
||||||
|
"""CRITICAL: Clear HTTP request cache between tests to ensure isolation."""
|
||||||
|
cache_dir = Path(os.getenv("REQUEST_CACHE_PATH"))
|
||||||
|
if cache_dir.exists() and cache_dir.is_dir():
|
||||||
|
for file_path in cache_dir.glob("*.sqlite"):
|
||||||
|
file_path.unlink()
|
||||||
|
yield
|
||||||
|
|||||||
@@ -0,0 +1 @@
|
|||||||
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||||
@@ -0,0 +1,43 @@
|
|||||||
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, MutableMapping
|
||||||
|
|
||||||
|
|
||||||
|
# Constants for test data - match connector's spec
|
||||||
|
SITE_API_KEY = "test_api_key_12345"
|
||||||
|
SITE = "test-site"
|
||||||
|
START_DATE = "2024-01-01T00:00:00Z"
|
||||||
|
PRODUCT_CATALOG = "2.0"
|
||||||
|
|
||||||
|
|
||||||
|
class ConfigBuilder:
|
||||||
|
"""Builder for creating test configurations matching connector spec."""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._config: MutableMapping[str, Any] = {
|
||||||
|
"site_api_key": SITE_API_KEY,
|
||||||
|
"site": SITE,
|
||||||
|
"start_date": START_DATE,
|
||||||
|
"product_catalog": PRODUCT_CATALOG,
|
||||||
|
}
|
||||||
|
|
||||||
|
def with_site_api_key(self, site_api_key: str) -> "ConfigBuilder":
|
||||||
|
self._config["site_api_key"] = site_api_key
|
||||||
|
return self
|
||||||
|
|
||||||
|
def with_site(self, site: str) -> "ConfigBuilder":
|
||||||
|
self._config["site"] = site
|
||||||
|
return self
|
||||||
|
|
||||||
|
def with_start_date(self, start_date: str) -> "ConfigBuilder":
|
||||||
|
self._config["start_date"] = start_date
|
||||||
|
return self
|
||||||
|
|
||||||
|
def with_product_catalog(self, product_catalog: str) -> "ConfigBuilder":
|
||||||
|
self._config["product_catalog"] = product_catalog
|
||||||
|
return self
|
||||||
|
|
||||||
|
def build(self) -> MutableMapping[str, Any]:
|
||||||
|
return self._config
|
||||||
@@ -0,0 +1,183 @@
|
|||||||
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
from airbyte_cdk.test.mock_http.request import ANY_QUERY_PARAMS, HttpRequest
|
||||||
|
|
||||||
|
from .config import SITE
|
||||||
|
|
||||||
|
|
||||||
|
# Must match manifest.yaml base URL exactly
|
||||||
|
API_BASE_URL = f"https://{SITE}.chargebee.com/api/v2"
|
||||||
|
|
||||||
|
|
||||||
|
class RequestBuilder:
|
||||||
|
"""Builder for creating HttpRequest objects for testing."""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def endpoint(cls, resource: str) -> "RequestBuilder":
|
||||||
|
return cls(resource)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def customers_endpoint(cls) -> "RequestBuilder":
|
||||||
|
return cls(resource="customers")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def customer_contacts_endpoint(cls, customer_id: str) -> "RequestBuilder":
|
||||||
|
return cls(resource=f"customers/{customer_id}/contacts")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def subscriptions_endpoint(cls) -> "RequestBuilder":
|
||||||
|
return cls(resource="subscriptions")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def invoices_endpoint(cls) -> "RequestBuilder":
|
||||||
|
return cls(resource="invoices")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def events_endpoint(cls) -> "RequestBuilder":
|
||||||
|
return cls(resource="events")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def transactions_endpoint(cls) -> "RequestBuilder":
|
||||||
|
return cls(resource="transactions")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def plans_endpoint(cls) -> "RequestBuilder":
|
||||||
|
return cls(resource="plans")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def addons_endpoint(cls) -> "RequestBuilder":
|
||||||
|
return cls(resource="addons")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def coupons_endpoint(cls) -> "RequestBuilder":
|
||||||
|
return cls(resource="coupons")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def items_endpoint(cls) -> "RequestBuilder":
|
||||||
|
return cls(resource="items")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def item_attached_items_endpoint(cls, item_id: str) -> "RequestBuilder":
|
||||||
|
return cls(resource=f"items/{item_id}/attached_items")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def gifts_endpoint(cls) -> "RequestBuilder":
|
||||||
|
return cls(resource="gifts")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def credit_notes_endpoint(cls) -> "RequestBuilder":
|
||||||
|
return cls(resource="credit_notes")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def orders_endpoint(cls) -> "RequestBuilder":
|
||||||
|
return cls(resource="orders")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def hosted_pages_endpoint(cls) -> "RequestBuilder":
|
||||||
|
return cls(resource="hosted_pages")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def item_prices_endpoint(cls) -> "RequestBuilder":
|
||||||
|
return cls(resource="item_prices")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def payment_sources_endpoint(cls) -> "RequestBuilder":
|
||||||
|
return cls(resource="payment_sources")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def promotional_credits_endpoint(cls) -> "RequestBuilder":
|
||||||
|
return cls(resource="promotional_credits")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def subscription_scheduled_changes_endpoint(cls, subscription_id: str) -> "RequestBuilder":
|
||||||
|
return cls(resource=f"subscriptions/{subscription_id}/retrieve_with_scheduled_changes")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def unbilled_charges_endpoint(cls) -> "RequestBuilder":
|
||||||
|
return cls(resource="unbilled_charges")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def virtual_bank_accounts_endpoint(cls) -> "RequestBuilder":
|
||||||
|
return cls(resource="virtual_bank_accounts")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def quotes_endpoint(cls) -> "RequestBuilder":
|
||||||
|
return cls(resource="quotes")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def quote_line_groups_endpoint(cls, quote_id: str) -> "RequestBuilder":
|
||||||
|
return cls(resource=f"quotes/{quote_id}/quote_line_groups")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def site_migration_details_endpoint(cls) -> "RequestBuilder":
|
||||||
|
return cls(resource="site_migration_details")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def comments_endpoint(cls) -> "RequestBuilder":
|
||||||
|
return cls(resource="comments")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def item_families_endpoint(cls) -> "RequestBuilder":
|
||||||
|
return cls(resource="item_families")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def differential_prices_endpoint(cls) -> "RequestBuilder":
|
||||||
|
return cls(resource="differential_prices")
|
||||||
|
|
||||||
|
def __init__(self, resource: str = "") -> None:
|
||||||
|
self._resource = resource
|
||||||
|
self._query_params: Dict[str, Any] = {}
|
||||||
|
self._any_query_params = False
|
||||||
|
|
||||||
|
def with_query_param(self, key: str, value: Any) -> "RequestBuilder":
|
||||||
|
self._query_params[key] = value
|
||||||
|
return self
|
||||||
|
|
||||||
|
def with_limit(self, limit: int) -> "RequestBuilder":
|
||||||
|
self._query_params["limit"] = str(limit)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def with_offset(self, offset: str) -> "RequestBuilder":
|
||||||
|
self._query_params["offset"] = offset
|
||||||
|
return self
|
||||||
|
|
||||||
|
def with_any_query_params(self) -> "RequestBuilder":
|
||||||
|
"""Use for endpoints with dynamic query params."""
|
||||||
|
self._any_query_params = True
|
||||||
|
return self
|
||||||
|
|
||||||
|
def with_sort_by_asc(self, field: str) -> "RequestBuilder":
|
||||||
|
"""Add sort_by[asc] parameter."""
|
||||||
|
self._query_params["sort_by[asc]"] = field
|
||||||
|
return self
|
||||||
|
|
||||||
|
def with_include_deleted(self, value: str = "true") -> "RequestBuilder":
|
||||||
|
"""Add include_deleted parameter."""
|
||||||
|
self._query_params["include_deleted"] = value
|
||||||
|
return self
|
||||||
|
|
||||||
|
def with_updated_at_between(self, start_time: int, end_time: int) -> "RequestBuilder":
|
||||||
|
"""Add updated_at[between] parameter for incremental streams."""
|
||||||
|
self._query_params["updated_at[between]"] = f"[{start_time}, {end_time}]"
|
||||||
|
return self
|
||||||
|
|
||||||
|
def with_occurred_at_between(self, start_time: int, end_time: int) -> "RequestBuilder":
|
||||||
|
"""Add occurred_at[between] parameter for event stream."""
|
||||||
|
self._query_params["occurred_at[between]"] = f"[{start_time}, {end_time}]"
|
||||||
|
return self
|
||||||
|
|
||||||
|
def with_created_at_between(self, start_time: int, end_time: int) -> "RequestBuilder":
|
||||||
|
"""Add created_at[between] parameter for comment and promotional_credit streams."""
|
||||||
|
self._query_params["created_at[between]"] = f"[{start_time}, {end_time}]"
|
||||||
|
return self
|
||||||
|
|
||||||
|
def build(self) -> HttpRequest:
|
||||||
|
query_params = ANY_QUERY_PARAMS if self._any_query_params else (self._query_params if self._query_params else None)
|
||||||
|
return HttpRequest(
|
||||||
|
url=f"{API_BASE_URL}/{self._resource}",
|
||||||
|
query_params=query_params,
|
||||||
|
)
|
||||||
@@ -0,0 +1,233 @@
|
|||||||
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||||
|
|
||||||
|
from http import HTTPStatus
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from airbyte_cdk.test.mock_http import HttpResponse
|
||||||
|
|
||||||
|
|
||||||
|
def _get_response_path() -> Path:
|
||||||
|
"""Get path to response JSON files."""
|
||||||
|
return Path(__file__).parent.parent / "resource" / "http" / "response"
|
||||||
|
|
||||||
|
|
||||||
|
def get_json_response(filename: str) -> str:
|
||||||
|
"""Load a JSON response from the resource directory."""
|
||||||
|
response_path = _get_response_path() / filename
|
||||||
|
return response_path.read_text()
|
||||||
|
|
||||||
|
|
||||||
|
def json_response(filename: str, status_code: HTTPStatus = HTTPStatus.OK) -> HttpResponse:
|
||||||
|
"""Create an HttpResponse from a JSON file."""
|
||||||
|
body = get_json_response(filename)
|
||||||
|
return HttpResponse(body=body, status_code=status_code.value, headers={})
|
||||||
|
|
||||||
|
|
||||||
|
def customer_response() -> HttpResponse:
|
||||||
|
"""Customer stream response."""
|
||||||
|
return json_response("customer.json")
|
||||||
|
|
||||||
|
|
||||||
|
def customer_response_page1() -> HttpResponse:
|
||||||
|
"""Customer stream response - page 1 with next_offset."""
|
||||||
|
return json_response("customer_page1.json")
|
||||||
|
|
||||||
|
|
||||||
|
def customer_response_page2() -> HttpResponse:
|
||||||
|
"""Customer stream response - page 2 (last page)."""
|
||||||
|
return json_response("customer_page2.json")
|
||||||
|
|
||||||
|
|
||||||
|
def customer_response_multiple() -> HttpResponse:
|
||||||
|
"""Customer stream response with multiple records."""
|
||||||
|
return json_response("customer_multiple.json")
|
||||||
|
|
||||||
|
|
||||||
|
def subscription_response() -> HttpResponse:
|
||||||
|
"""Subscription stream response."""
|
||||||
|
return json_response("subscription.json")
|
||||||
|
|
||||||
|
|
||||||
|
def subscription_response_page1() -> HttpResponse:
|
||||||
|
"""Subscription stream response - page 1 with next_offset."""
|
||||||
|
return json_response("subscription_page1.json")
|
||||||
|
|
||||||
|
|
||||||
|
def subscription_response_page2() -> HttpResponse:
|
||||||
|
"""Subscription stream response - page 2 (last page)."""
|
||||||
|
return json_response("subscription_page2.json")
|
||||||
|
|
||||||
|
|
||||||
|
def invoice_response() -> HttpResponse:
|
||||||
|
"""Invoice stream response."""
|
||||||
|
return json_response("invoice.json")
|
||||||
|
|
||||||
|
|
||||||
|
def event_response() -> HttpResponse:
|
||||||
|
"""Event stream response."""
|
||||||
|
return json_response("event.json")
|
||||||
|
|
||||||
|
|
||||||
|
def event_response_page1() -> HttpResponse:
|
||||||
|
"""Event stream response - page 1 with next_offset."""
|
||||||
|
return json_response("event_page1.json")
|
||||||
|
|
||||||
|
|
||||||
|
def event_response_page2() -> HttpResponse:
|
||||||
|
"""Event stream response - page 2 (last page)."""
|
||||||
|
return json_response("event_page2.json")
|
||||||
|
|
||||||
|
|
||||||
|
def transaction_response() -> HttpResponse:
|
||||||
|
"""Transaction stream response."""
|
||||||
|
return json_response("transaction.json")
|
||||||
|
|
||||||
|
|
||||||
|
def plan_response() -> HttpResponse:
|
||||||
|
"""Plan stream response."""
|
||||||
|
return json_response("plan.json")
|
||||||
|
|
||||||
|
|
||||||
|
def addon_response() -> HttpResponse:
|
||||||
|
"""Addon stream response."""
|
||||||
|
return json_response("addon.json")
|
||||||
|
|
||||||
|
|
||||||
|
def coupon_response() -> HttpResponse:
|
||||||
|
"""Coupon stream response."""
|
||||||
|
return json_response("coupon.json")
|
||||||
|
|
||||||
|
|
||||||
|
def credit_note_response() -> HttpResponse:
|
||||||
|
"""Credit note stream response."""
|
||||||
|
return json_response("credit_note.json")
|
||||||
|
|
||||||
|
|
||||||
|
def gift_response() -> HttpResponse:
|
||||||
|
"""Gift stream response."""
|
||||||
|
return json_response("gift.json")
|
||||||
|
|
||||||
|
|
||||||
|
def item_response() -> HttpResponse:
|
||||||
|
"""Item stream response."""
|
||||||
|
return json_response("item.json")
|
||||||
|
|
||||||
|
|
||||||
|
def item_response_multiple() -> HttpResponse:
|
||||||
|
"""Item stream response with multiple records."""
|
||||||
|
return json_response("item_multiple.json")
|
||||||
|
|
||||||
|
|
||||||
|
def contact_response() -> HttpResponse:
|
||||||
|
"""Contact stream response (substream of customer)."""
|
||||||
|
return json_response("contact.json")
|
||||||
|
|
||||||
|
|
||||||
|
def attached_item_response() -> HttpResponse:
|
||||||
|
"""Attached item stream response (substream of item)."""
|
||||||
|
return json_response("attached_item.json")
|
||||||
|
|
||||||
|
|
||||||
|
def empty_response() -> HttpResponse:
|
||||||
|
"""Empty response with no records."""
|
||||||
|
return json_response("empty.json")
|
||||||
|
|
||||||
|
|
||||||
|
def error_response(status_code: HTTPStatus = HTTPStatus.UNAUTHORIZED) -> HttpResponse:
|
||||||
|
"""Error response for testing error handling."""
|
||||||
|
error_files = {
|
||||||
|
HTTPStatus.UNAUTHORIZED: "error_unauthorized.json",
|
||||||
|
HTTPStatus.NOT_FOUND: "error_not_found.json",
|
||||||
|
}
|
||||||
|
filename = error_files.get(status_code, "error_unauthorized.json")
|
||||||
|
return json_response(filename, status_code)
|
||||||
|
|
||||||
|
|
||||||
|
def configuration_incompatible_response() -> HttpResponse:
|
||||||
|
"""Response for configuration_incompatible error (IGNORE action)."""
|
||||||
|
return json_response("error_configuration_incompatible.json", HTTPStatus.BAD_REQUEST)
|
||||||
|
|
||||||
|
|
||||||
|
def order_response() -> HttpResponse:
|
||||||
|
"""Order stream response."""
|
||||||
|
return json_response("order.json")
|
||||||
|
|
||||||
|
|
||||||
|
def hosted_page_response() -> HttpResponse:
|
||||||
|
"""Hosted page stream response."""
|
||||||
|
return json_response("hosted_page.json")
|
||||||
|
|
||||||
|
|
||||||
|
def item_price_response() -> HttpResponse:
|
||||||
|
"""Item price stream response."""
|
||||||
|
return json_response("item_price.json")
|
||||||
|
|
||||||
|
|
||||||
|
def payment_source_response() -> HttpResponse:
|
||||||
|
"""Payment source stream response."""
|
||||||
|
return json_response("payment_source.json")
|
||||||
|
|
||||||
|
|
||||||
|
def promotional_credit_response() -> HttpResponse:
|
||||||
|
"""Promotional credit stream response."""
|
||||||
|
return json_response("promotional_credit.json")
|
||||||
|
|
||||||
|
|
||||||
|
def subscription_response_multiple() -> HttpResponse:
|
||||||
|
"""Subscription stream response with multiple records."""
|
||||||
|
return json_response("subscription_multiple.json")
|
||||||
|
|
||||||
|
|
||||||
|
def subscription_with_scheduled_changes_response() -> HttpResponse:
|
||||||
|
"""Subscription with scheduled changes stream response."""
|
||||||
|
return json_response("subscription_with_scheduled_changes.json")
|
||||||
|
|
||||||
|
|
||||||
|
def unbilled_charge_response() -> HttpResponse:
|
||||||
|
"""Unbilled charge stream response."""
|
||||||
|
return json_response("unbilled_charge.json")
|
||||||
|
|
||||||
|
|
||||||
|
def virtual_bank_account_response() -> HttpResponse:
|
||||||
|
"""Virtual bank account stream response."""
|
||||||
|
return json_response("virtual_bank_account.json")
|
||||||
|
|
||||||
|
|
||||||
|
def quote_response() -> HttpResponse:
|
||||||
|
"""Quote stream response."""
|
||||||
|
return json_response("quote.json")
|
||||||
|
|
||||||
|
|
||||||
|
def quote_response_multiple() -> HttpResponse:
|
||||||
|
"""Quote stream response with multiple records."""
|
||||||
|
return json_response("quote_multiple.json")
|
||||||
|
|
||||||
|
|
||||||
|
def quote_line_group_response() -> HttpResponse:
|
||||||
|
"""Quote line group stream response."""
|
||||||
|
return json_response("quote_line_group.json")
|
||||||
|
|
||||||
|
|
||||||
|
def site_migration_detail_response() -> HttpResponse:
|
||||||
|
"""Site migration detail stream response."""
|
||||||
|
return json_response("site_migration_detail.json")
|
||||||
|
|
||||||
|
|
||||||
|
def comment_response() -> HttpResponse:
|
||||||
|
"""Comment stream response."""
|
||||||
|
return json_response("comment.json")
|
||||||
|
|
||||||
|
|
||||||
|
def item_family_response() -> HttpResponse:
|
||||||
|
"""Item family stream response."""
|
||||||
|
return json_response("item_family.json")
|
||||||
|
|
||||||
|
|
||||||
|
def differential_price_response() -> HttpResponse:
|
||||||
|
"""Differential price stream response."""
|
||||||
|
return json_response("differential_price.json")
|
||||||
|
|
||||||
|
|
||||||
|
def error_no_scheduled_changes_response() -> HttpResponse:
|
||||||
|
"""Response for 'No changes are scheduled for this subscription' error (IGNORE action)."""
|
||||||
|
return json_response("error_no_scheduled_changes.json", HTTPStatus.BAD_REQUEST)
|
||||||
@@ -0,0 +1,152 @@
|
|||||||
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||||
|
|
||||||
|
from unittest import TestCase
|
||||||
|
|
||||||
|
import freezegun
|
||||||
|
|
||||||
|
from airbyte_cdk.models import SyncMode
|
||||||
|
from airbyte_cdk.test.mock_http import HttpMocker
|
||||||
|
from airbyte_cdk.test.state_builder import StateBuilder
|
||||||
|
|
||||||
|
from .request_builder import RequestBuilder
|
||||||
|
from .response_builder import addon_response, configuration_incompatible_response
|
||||||
|
from .utils import config, read_output
|
||||||
|
|
||||||
|
|
||||||
|
_STREAM_NAME = "addon"
|
||||||
|
|
||||||
|
|
||||||
|
@freezegun.freeze_time("2024-01-15T12:00:00Z")
|
||||||
|
class TestAddonStream(TestCase):
|
||||||
|
"""Tests for the addon stream."""
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_read_records(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Basic read test for addon stream."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.addons_endpoint().with_any_query_params().build(),
|
||||||
|
addon_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
assert len(output.records) == 1
|
||||||
|
assert output.records[0].record.data["id"] == "addon_001"
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_incremental_emits_state(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test that incremental sync emits state message."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.addons_endpoint().with_any_query_params().build(),
|
||||||
|
addon_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental)
|
||||||
|
|
||||||
|
# Verify exactly 1 record returned
|
||||||
|
assert len(output.records) == 1
|
||||||
|
|
||||||
|
# Verify state message was emitted
|
||||||
|
assert len(output.state_messages) > 0
|
||||||
|
|
||||||
|
# Verify state contains correct cursor value
|
||||||
|
latest_state = output.state_messages[-1].state.stream.stream_state
|
||||||
|
latest_cursor_value = int(latest_state.__dict__["updated_at"])
|
||||||
|
|
||||||
|
# Check response file for the actual timestamp value!
|
||||||
|
assert latest_cursor_value == 1705312800 # From addon.json
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_transformation_custom_fields(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test that CustomFieldTransformation converts cf_* fields to custom_fields array."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.addons_endpoint().with_any_query_params().build(),
|
||||||
|
addon_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
|
||||||
|
# Assert record exists
|
||||||
|
assert len(output.records) == 1
|
||||||
|
record_data = output.records[0].record.data
|
||||||
|
|
||||||
|
# Assert cf_ fields are REMOVED from top level
|
||||||
|
assert not any(
|
||||||
|
key.startswith("cf_") for key in record_data.keys()
|
||||||
|
), "cf_ fields should be removed from record and moved to custom_fields array"
|
||||||
|
|
||||||
|
# Assert custom_fields array EXISTS
|
||||||
|
assert "custom_fields" in record_data, "custom_fields array should be created by CustomFieldTransformation"
|
||||||
|
assert isinstance(record_data["custom_fields"], list)
|
||||||
|
|
||||||
|
# Assert custom_fields array contains the transformed fields
|
||||||
|
assert len(record_data["custom_fields"]) == 2, "custom_fields array should contain 2 transformed fields"
|
||||||
|
|
||||||
|
# Verify structure and values of custom_fields items
|
||||||
|
custom_fields = {cf["name"]: cf["value"] for cf in record_data["custom_fields"]}
|
||||||
|
assert len(custom_fields) == 2, "Should have exactly 2 custom fields"
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_incremental_sync_with_state_and_params(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""
|
||||||
|
Test incremental sync with prior state and validate request parameters.
|
||||||
|
|
||||||
|
This test validates:
|
||||||
|
1. State from previous sync is accepted
|
||||||
|
2. Correct request parameters are sent (sort_by, include_deleted, updated_at[between])
|
||||||
|
3. State advances to latest record's cursor value
|
||||||
|
"""
|
||||||
|
# ARRANGE: Previous state from last sync
|
||||||
|
previous_state_timestamp = 1704067200 # 2024-01-01T00:00:00
|
||||||
|
state = StateBuilder().with_stream_state(_STREAM_NAME, {"updated_at": previous_state_timestamp}).build()
|
||||||
|
|
||||||
|
# Mock API response with record AFTER the state timestamp
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.addons_endpoint()
|
||||||
|
.with_sort_by_asc("updated_at")
|
||||||
|
.with_include_deleted("true")
|
||||||
|
.with_updated_at_between(previous_state_timestamp, 1705320000) # Frozen time: 2024-01-15T12:00:00Z
|
||||||
|
.with_limit(100)
|
||||||
|
.build(),
|
||||||
|
addon_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
# ACT: Run incremental sync with state
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental, state=state)
|
||||||
|
|
||||||
|
# ASSERT: Records returned
|
||||||
|
assert len(output.records) == 1, "Should return exactly 1 record"
|
||||||
|
record = output.records[0].record.data
|
||||||
|
|
||||||
|
# ASSERT: Record data is correct
|
||||||
|
assert record["id"] == "addon_001"
|
||||||
|
assert record["updated_at"] >= previous_state_timestamp, "Record should be from after the state timestamp"
|
||||||
|
|
||||||
|
# ASSERT: State message emitted
|
||||||
|
assert len(output.state_messages) > 0, "Should emit state messages"
|
||||||
|
|
||||||
|
# ASSERT: State advances to latest record
|
||||||
|
latest_state = output.state_messages[-1].state.stream.stream_state
|
||||||
|
latest_cursor_value = int(latest_state.__dict__["updated_at"])
|
||||||
|
|
||||||
|
# State should advance beyond previous state
|
||||||
|
assert latest_cursor_value > previous_state_timestamp, f"State should advance: {latest_cursor_value} > {previous_state_timestamp}"
|
||||||
|
|
||||||
|
# State should match the latest record's cursor value
|
||||||
|
assert (
|
||||||
|
latest_cursor_value == 1705312800
|
||||||
|
), f"State should be latest record's cursor value: expected 1705312800, got {latest_cursor_value}"
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_error_configuration_incompatible_ignored(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test configuration_incompatible error is ignored for addon stream as configured in manifest."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.addons_endpoint().with_any_query_params().build(),
|
||||||
|
configuration_incompatible_response(),
|
||||||
|
)
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
|
||||||
|
# Verify no records returned (error was ignored)
|
||||||
|
assert len(output.records) == 0
|
||||||
|
|
||||||
|
# Verify error message from manifest is logged
|
||||||
|
assert output.is_in_logs("Stream is available only for Product Catalog 1.0")
|
||||||
@@ -0,0 +1,120 @@
|
|||||||
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||||
|
|
||||||
|
from unittest import TestCase
|
||||||
|
|
||||||
|
import freezegun
|
||||||
|
|
||||||
|
from airbyte_cdk.models import SyncMode
|
||||||
|
from airbyte_cdk.test.mock_http import HttpMocker
|
||||||
|
from airbyte_cdk.test.state_builder import StateBuilder
|
||||||
|
|
||||||
|
from .request_builder import RequestBuilder
|
||||||
|
from .response_builder import (
|
||||||
|
attached_item_response,
|
||||||
|
configuration_incompatible_response,
|
||||||
|
item_response,
|
||||||
|
item_response_multiple,
|
||||||
|
)
|
||||||
|
from .utils import config, read_output
|
||||||
|
|
||||||
|
|
||||||
|
_STREAM_NAME = "attached_item"
|
||||||
|
|
||||||
|
|
||||||
|
@freezegun.freeze_time("2024-01-15T12:00:00Z")
|
||||||
|
class TestAttachedItemStream(TestCase):
|
||||||
|
"""Tests for the attached_item stream (substream of item)."""
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_read_records(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Basic read test for attached_item stream (substream of item)."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.items_endpoint().with_any_query_params().build(),
|
||||||
|
item_response(),
|
||||||
|
)
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.item_attached_items_endpoint("item_001").with_any_query_params().build(),
|
||||||
|
attached_item_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
assert len(output.records) == 1
|
||||||
|
assert output.records[0].record.data["id"] == "attached_001"
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_with_multiple_parents(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test attached_item substream with multiple parent items."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.items_endpoint().with_any_query_params().build(),
|
||||||
|
item_response_multiple(),
|
||||||
|
)
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.item_attached_items_endpoint("item_001").with_any_query_params().build(),
|
||||||
|
attached_item_response(),
|
||||||
|
)
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.item_attached_items_endpoint("item_002").with_any_query_params().build(),
|
||||||
|
attached_item_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
assert len(output.records) == 2
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_transformation_custom_fields(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test that CustomFieldTransformation converts cf_* fields to custom_fields array."""
|
||||||
|
# Mock parent item stream
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.items_endpoint().with_any_query_params().build(),
|
||||||
|
item_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Mock attached_item substream (with cf_ fields)
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.item_attached_items_endpoint("item_001").with_any_query_params().build(),
|
||||||
|
attached_item_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
|
||||||
|
assert len(output.records) == 1
|
||||||
|
record_data = output.records[0].record.data
|
||||||
|
|
||||||
|
# Assert cf_ fields are REMOVED from top level
|
||||||
|
assert not any(
|
||||||
|
key.startswith("cf_") for key in record_data.keys()
|
||||||
|
), "cf_ fields should be removed from record and moved to custom_fields array"
|
||||||
|
|
||||||
|
# Assert custom_fields array EXISTS
|
||||||
|
assert "custom_fields" in record_data, "custom_fields array should be created by CustomFieldTransformation"
|
||||||
|
assert isinstance(record_data["custom_fields"], list)
|
||||||
|
|
||||||
|
# Assert custom_fields array contains the transformed fields
|
||||||
|
assert len(record_data["custom_fields"]) == 2, "custom_fields array should contain 2 transformed fields"
|
||||||
|
|
||||||
|
# Verify structure and values of custom_fields items
|
||||||
|
custom_fields = {cf["name"]: cf["value"] for cf in record_data["custom_fields"]}
|
||||||
|
assert len(custom_fields) == 2, "Should have exactly 2 custom fields"
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_error_configuration_incompatible_ignored(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test configuration_incompatible error is ignored for attached_item stream as configured in manifest."""
|
||||||
|
# Mock parent stream (item) to return successfully
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.items_endpoint().with_any_query_params().build(),
|
||||||
|
item_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Mock attached_item substream to return CONFIG_INCOMPATIBLE
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.item_attached_items_endpoint("item_001").with_any_query_params().build(),
|
||||||
|
configuration_incompatible_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
|
||||||
|
# Verify no records returned (error was ignored)
|
||||||
|
assert len(output.records) == 0
|
||||||
|
|
||||||
|
# Verify error message from manifest is logged
|
||||||
|
assert output.is_in_logs("Stream is available only for Product Catalog 1.0")
|
||||||
@@ -0,0 +1,152 @@
|
|||||||
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||||
|
|
||||||
|
from unittest import TestCase
|
||||||
|
|
||||||
|
import freezegun
|
||||||
|
|
||||||
|
from airbyte_cdk.models import SyncMode
|
||||||
|
from airbyte_cdk.test.mock_http import HttpMocker
|
||||||
|
from airbyte_cdk.test.state_builder import StateBuilder
|
||||||
|
|
||||||
|
from .request_builder import RequestBuilder
|
||||||
|
from .response_builder import comment_response, configuration_incompatible_response
|
||||||
|
from .utils import config, read_output
|
||||||
|
|
||||||
|
|
||||||
|
_STREAM_NAME = "comment"
|
||||||
|
|
||||||
|
|
||||||
|
@freezegun.freeze_time("2024-01-15T12:00:00Z")
|
||||||
|
class TestCommentStream(TestCase):
|
||||||
|
"""Tests for the comment stream."""
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_read_records(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Basic read test for comment stream."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.comments_endpoint().with_any_query_params().build(),
|
||||||
|
comment_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
assert len(output.records) == 1
|
||||||
|
assert output.records[0].record.data["id"] == "comment_001"
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_incremental_emits_state(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test that incremental sync emits state message."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.comments_endpoint().with_any_query_params().build(),
|
||||||
|
comment_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental)
|
||||||
|
|
||||||
|
# Verify exactly 1 record returned
|
||||||
|
assert len(output.records) == 1
|
||||||
|
|
||||||
|
# Verify state message was emitted
|
||||||
|
assert len(output.state_messages) > 0
|
||||||
|
|
||||||
|
# Verify state contains correct cursor value (comment uses created_at)
|
||||||
|
latest_state = output.state_messages[-1].state.stream.stream_state
|
||||||
|
latest_cursor_value = int(latest_state.__dict__["created_at"])
|
||||||
|
|
||||||
|
# Check response file for the actual timestamp value!
|
||||||
|
assert latest_cursor_value == 1705312800 # From comment.json
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_transformation_custom_fields(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test that CustomFieldTransformation converts cf_* fields to custom_fields array."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.comments_endpoint().with_any_query_params().build(),
|
||||||
|
comment_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
|
||||||
|
# Assert record exists
|
||||||
|
assert len(output.records) == 1
|
||||||
|
record_data = output.records[0].record.data
|
||||||
|
|
||||||
|
# Assert cf_ fields are REMOVED from top level
|
||||||
|
assert not any(
|
||||||
|
key.startswith("cf_") for key in record_data.keys()
|
||||||
|
), "cf_ fields should be removed from record and moved to custom_fields array"
|
||||||
|
|
||||||
|
# Assert custom_fields array EXISTS
|
||||||
|
assert "custom_fields" in record_data, "custom_fields array should be created by CustomFieldTransformation"
|
||||||
|
assert isinstance(record_data["custom_fields"], list)
|
||||||
|
|
||||||
|
# Assert custom_fields array contains the transformed fields
|
||||||
|
assert len(record_data["custom_fields"]) == 2, "custom_fields array should contain 2 transformed fields"
|
||||||
|
|
||||||
|
# Verify structure and values of custom_fields items
|
||||||
|
custom_fields = {cf["name"]: cf["value"] for cf in record_data["custom_fields"]}
|
||||||
|
assert len(custom_fields) == 2, "Should have exactly 2 custom fields"
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_incremental_sync_with_state_and_params(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""
|
||||||
|
Test incremental sync with prior state and validate request parameters.
|
||||||
|
|
||||||
|
This test validates:
|
||||||
|
1. State from previous sync is accepted
|
||||||
|
2. Correct request parameters are sent (sort_by[asc]=created_at, created_at[between])
|
||||||
|
3. State advances to latest record's cursor value
|
||||||
|
|
||||||
|
Note: comment stream uses created_at cursor (not updated_at) and has NO include_deleted.
|
||||||
|
"""
|
||||||
|
# ARRANGE: Previous state from last sync
|
||||||
|
previous_state_timestamp = 1704067200 # 2024-01-01T00:00:00
|
||||||
|
state = StateBuilder().with_stream_state(_STREAM_NAME, {"created_at": previous_state_timestamp}).build()
|
||||||
|
|
||||||
|
# Mock API response with record AFTER the state timestamp
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.comments_endpoint()
|
||||||
|
.with_sort_by_asc("created_at")
|
||||||
|
.with_created_at_between(previous_state_timestamp, 1705320000) # Frozen time: 2024-01-15T12:00:00Z
|
||||||
|
.with_limit(100)
|
||||||
|
.build(),
|
||||||
|
comment_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
# ACT: Run incremental sync with state
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental, state=state)
|
||||||
|
|
||||||
|
# ASSERT: Records returned
|
||||||
|
assert len(output.records) == 1, "Should return exactly 1 record"
|
||||||
|
record = output.records[0].record.data
|
||||||
|
|
||||||
|
# ASSERT: Record data is correct
|
||||||
|
assert record["id"] == "comment_001"
|
||||||
|
|
||||||
|
# ASSERT: State message emitted
|
||||||
|
assert len(output.state_messages) > 0, "Should emit state messages"
|
||||||
|
|
||||||
|
# ASSERT: State advances to latest record
|
||||||
|
latest_state = output.state_messages[-1].state.stream.stream_state
|
||||||
|
latest_cursor_value = int(latest_state.__dict__["created_at"])
|
||||||
|
|
||||||
|
# State should advance beyond previous state
|
||||||
|
assert latest_cursor_value > previous_state_timestamp, f"State should advance: {latest_cursor_value} > {previous_state_timestamp}"
|
||||||
|
|
||||||
|
# State should match the latest record's cursor value
|
||||||
|
assert (
|
||||||
|
latest_cursor_value == 1705312800
|
||||||
|
), f"State should be latest record's cursor value: expected 1705312800, got {latest_cursor_value}"
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_error_configuration_incompatible_ignored(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test configuration_incompatible error is ignored for comment stream as configured in manifest."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.comments_endpoint().with_any_query_params().build(),
|
||||||
|
configuration_incompatible_response(),
|
||||||
|
)
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
|
||||||
|
# Verify no records returned (error was ignored)
|
||||||
|
assert len(output.records) == 0
|
||||||
|
|
||||||
|
# Verify error message from manifest is logged
|
||||||
|
assert output.is_in_logs("Stream is available only for Product Catalog 1.0")
|
||||||
@@ -0,0 +1,121 @@
|
|||||||
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||||
|
|
||||||
|
from unittest import TestCase
|
||||||
|
|
||||||
|
import freezegun
|
||||||
|
|
||||||
|
from airbyte_cdk.models import SyncMode
|
||||||
|
from airbyte_cdk.test.mock_http import HttpMocker
|
||||||
|
from airbyte_cdk.test.state_builder import StateBuilder
|
||||||
|
|
||||||
|
from .request_builder import RequestBuilder
|
||||||
|
from .response_builder import (
|
||||||
|
configuration_incompatible_response,
|
||||||
|
contact_response,
|
||||||
|
customer_response,
|
||||||
|
customer_response_multiple,
|
||||||
|
)
|
||||||
|
from .utils import config, read_output
|
||||||
|
|
||||||
|
|
||||||
|
_STREAM_NAME = "contact"
|
||||||
|
|
||||||
|
|
||||||
|
@freezegun.freeze_time("2024-01-15T12:00:00Z")
|
||||||
|
class TestContactStream(TestCase):
|
||||||
|
"""Tests for the contact stream (substream of customer)."""
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_read_records(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Basic read test for contact stream (substream of customer)."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.customers_endpoint().with_any_query_params().build(),
|
||||||
|
customer_response(),
|
||||||
|
)
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.customer_contacts_endpoint("cust_001").with_any_query_params().build(),
|
||||||
|
contact_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
assert len(output.records) == 1
|
||||||
|
assert output.records[0].record.data["id"] == "contact_001"
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_with_multiple_parents(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test contact substream with multiple parent customers."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.customers_endpoint().with_any_query_params().build(),
|
||||||
|
customer_response_multiple(),
|
||||||
|
)
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.customer_contacts_endpoint("cust_001").with_any_query_params().build(),
|
||||||
|
contact_response(),
|
||||||
|
)
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.customer_contacts_endpoint("cust_002").with_any_query_params().build(),
|
||||||
|
contact_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
assert len(output.records) == 2
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_both_transformations(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""
|
||||||
|
Test that BOTH transformations work together:
|
||||||
|
1. AddFields adds customer_id from parent stream slice
|
||||||
|
2. CustomFieldTransformation converts cf_* fields to custom_fields array
|
||||||
|
"""
|
||||||
|
# Mock parent customer stream
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.customers_endpoint().with_any_query_params().build(),
|
||||||
|
customer_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Mock contact substream (with cf_ fields)
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.customer_contacts_endpoint("cust_001").with_any_query_params().build(),
|
||||||
|
contact_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
|
||||||
|
assert len(output.records) == 1
|
||||||
|
record_data = output.records[0].record.data
|
||||||
|
|
||||||
|
# ========== Test Transformation #1: AddFields ==========
|
||||||
|
assert "customer_id" in record_data, "AddFields transformation should add customer_id field"
|
||||||
|
assert record_data["customer_id"] == "cust_001", "customer_id should match parent stream's id"
|
||||||
|
|
||||||
|
# ========== Test Transformation #2: CustomFieldTransformation ==========
|
||||||
|
assert not any(key.startswith("cf_") for key in record_data.keys()), "cf_ fields should be removed from top level"
|
||||||
|
assert "custom_fields" in record_data
|
||||||
|
assert isinstance(record_data["custom_fields"], list)
|
||||||
|
assert len(record_data["custom_fields"]) == 2
|
||||||
|
|
||||||
|
custom_fields = {cf["name"]: cf["value"] for cf in record_data["custom_fields"]}
|
||||||
|
assert len(custom_fields) == 2
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_error_configuration_incompatible_ignored(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test configuration_incompatible error is ignored for contact stream as configured in manifest."""
|
||||||
|
# Mock parent stream (customer) to return successfully
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.customers_endpoint().with_any_query_params().build(),
|
||||||
|
customer_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Mock contact substream to return CONFIG_INCOMPATIBLE
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.customer_contacts_endpoint("cust_001").with_any_query_params().build(),
|
||||||
|
configuration_incompatible_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
|
||||||
|
# Verify no records returned (error was ignored)
|
||||||
|
assert len(output.records) == 0
|
||||||
|
|
||||||
|
# Verify error message from manifest is logged
|
||||||
|
assert output.is_in_logs("Stream is available only for Product Catalog 1.0")
|
||||||
@@ -0,0 +1,153 @@
|
|||||||
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||||
|
|
||||||
|
from unittest import TestCase
|
||||||
|
|
||||||
|
import freezegun
|
||||||
|
|
||||||
|
from airbyte_cdk.models import SyncMode
|
||||||
|
from airbyte_cdk.test.mock_http import HttpMocker
|
||||||
|
from airbyte_cdk.test.state_builder import StateBuilder
|
||||||
|
|
||||||
|
from .request_builder import RequestBuilder
|
||||||
|
from .response_builder import configuration_incompatible_response, coupon_response
|
||||||
|
from .utils import config, read_output
|
||||||
|
|
||||||
|
|
||||||
|
_STREAM_NAME = "coupon"
|
||||||
|
|
||||||
|
|
||||||
|
@freezegun.freeze_time("2024-01-15T12:00:00Z")
|
||||||
|
class TestCouponStream(TestCase):
|
||||||
|
"""Tests for the coupon stream."""
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_read_records(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Basic read test for coupon stream."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.coupons_endpoint().with_any_query_params().build(),
|
||||||
|
coupon_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
assert len(output.records) == 1
|
||||||
|
assert output.records[0].record.data["id"] == "coupon_001"
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_incremental_emits_state(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test that incremental sync emits state message."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.coupons_endpoint().with_any_query_params().build(),
|
||||||
|
coupon_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental)
|
||||||
|
|
||||||
|
# Verify exactly 1 record returned
|
||||||
|
assert len(output.records) == 1
|
||||||
|
|
||||||
|
# Verify state message was emitted
|
||||||
|
assert len(output.state_messages) > 0
|
||||||
|
|
||||||
|
# Verify state contains correct cursor value
|
||||||
|
latest_state = output.state_messages[-1].state.stream.stream_state
|
||||||
|
latest_cursor_value = int(latest_state.__dict__["updated_at"])
|
||||||
|
|
||||||
|
# Check response file for the actual timestamp value!
|
||||||
|
assert latest_cursor_value == 1705312800 # From coupon.json
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_transformation_custom_fields(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test that CustomFieldTransformation converts cf_* fields to custom_fields array."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.coupons_endpoint().with_any_query_params().build(),
|
||||||
|
coupon_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
|
||||||
|
# Assert record exists
|
||||||
|
assert len(output.records) == 1
|
||||||
|
record_data = output.records[0].record.data
|
||||||
|
|
||||||
|
# Assert cf_ fields are REMOVED from top level
|
||||||
|
assert not any(
|
||||||
|
key.startswith("cf_") for key in record_data.keys()
|
||||||
|
), "cf_ fields should be removed from record and moved to custom_fields array"
|
||||||
|
|
||||||
|
# Assert custom_fields array EXISTS
|
||||||
|
assert "custom_fields" in record_data, "custom_fields array should be created by CustomFieldTransformation"
|
||||||
|
assert isinstance(record_data["custom_fields"], list)
|
||||||
|
|
||||||
|
# Assert custom_fields array contains the transformed fields
|
||||||
|
assert len(record_data["custom_fields"]) == 2, "custom_fields array should contain 2 transformed fields"
|
||||||
|
|
||||||
|
# Verify structure and values of custom_fields items
|
||||||
|
custom_fields = {cf["name"]: cf["value"] for cf in record_data["custom_fields"]}
|
||||||
|
assert len(custom_fields) == 2, "Should have exactly 2 custom fields"
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_incremental_sync_with_state_and_params(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""
|
||||||
|
Test incremental sync with prior state and validate request parameters.
|
||||||
|
|
||||||
|
This test validates:
|
||||||
|
1. State from previous sync is accepted
|
||||||
|
2. Correct request parameters are sent (only updated_at[between] - NO sort_by or include_deleted)
|
||||||
|
3. State advances to latest record's cursor value
|
||||||
|
|
||||||
|
Note: coupon stream uses updated_at cursor but has NO sort_by or include_deleted parameters.
|
||||||
|
"""
|
||||||
|
# ARRANGE: Previous state from last sync
|
||||||
|
previous_state_timestamp = 1704067200 # 2024-01-01T00:00:00
|
||||||
|
state = StateBuilder().with_stream_state(_STREAM_NAME, {"updated_at": previous_state_timestamp}).build()
|
||||||
|
|
||||||
|
# Mock API response with record AFTER the state timestamp
|
||||||
|
# Note: Coupon stream does NOT use sort_by or include_deleted
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.coupons_endpoint()
|
||||||
|
.with_updated_at_between(previous_state_timestamp, 1705320000) # Frozen time: 2024-01-15T12:00:00Z
|
||||||
|
.with_limit(100)
|
||||||
|
.build(),
|
||||||
|
coupon_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
# ACT: Run incremental sync with state
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental, state=state)
|
||||||
|
|
||||||
|
# ASSERT: Records returned
|
||||||
|
assert len(output.records) == 1, "Should return exactly 1 record"
|
||||||
|
record = output.records[0].record.data
|
||||||
|
|
||||||
|
# ASSERT: Record data is correct
|
||||||
|
assert record["id"] == "coupon_001"
|
||||||
|
assert record["updated_at"] >= previous_state_timestamp, "Record should be from after the state timestamp"
|
||||||
|
|
||||||
|
# ASSERT: State message emitted
|
||||||
|
assert len(output.state_messages) > 0, "Should emit state messages"
|
||||||
|
|
||||||
|
# ASSERT: State advances to latest record
|
||||||
|
latest_state = output.state_messages[-1].state.stream.stream_state
|
||||||
|
latest_cursor_value = int(latest_state.__dict__["updated_at"])
|
||||||
|
|
||||||
|
# State should advance beyond previous state
|
||||||
|
assert latest_cursor_value > previous_state_timestamp, f"State should advance: {latest_cursor_value} > {previous_state_timestamp}"
|
||||||
|
|
||||||
|
# State should match the latest record's cursor value
|
||||||
|
assert (
|
||||||
|
latest_cursor_value == 1705312800
|
||||||
|
), f"State should be latest record's cursor value: expected 1705312800, got {latest_cursor_value}"
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_error_configuration_incompatible_ignored(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test configuration_incompatible error is ignored for coupon stream as configured in manifest."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.coupons_endpoint().with_any_query_params().build(),
|
||||||
|
configuration_incompatible_response(),
|
||||||
|
)
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
|
||||||
|
# Verify no records returned (error was ignored)
|
||||||
|
assert len(output.records) == 0
|
||||||
|
|
||||||
|
# Verify error message from manifest is logged
|
||||||
|
assert output.is_in_logs("Stream is available only for Product Catalog 1.0")
|
||||||
@@ -0,0 +1,154 @@
|
|||||||
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||||
|
|
||||||
|
from unittest import TestCase
|
||||||
|
|
||||||
|
import freezegun
|
||||||
|
|
||||||
|
from airbyte_cdk.models import SyncMode
|
||||||
|
from airbyte_cdk.test.mock_http import HttpMocker
|
||||||
|
from airbyte_cdk.test.state_builder import StateBuilder
|
||||||
|
|
||||||
|
from .request_builder import RequestBuilder
|
||||||
|
from .response_builder import configuration_incompatible_response, credit_note_response
|
||||||
|
from .utils import config, read_output
|
||||||
|
|
||||||
|
|
||||||
|
_STREAM_NAME = "credit_note"
|
||||||
|
|
||||||
|
|
||||||
|
@freezegun.freeze_time("2024-01-15T12:00:00Z")
|
||||||
|
class TestCreditNoteStream(TestCase):
|
||||||
|
"""Tests for the credit_note stream."""
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_read_records(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Basic read test for credit_note stream."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.credit_notes_endpoint().with_any_query_params().build(),
|
||||||
|
credit_note_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
assert len(output.records) == 1
|
||||||
|
assert output.records[0].record.data["id"] == "cn_001"
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_incremental_emits_state(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test that incremental sync emits state message."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.credit_notes_endpoint().with_any_query_params().build(),
|
||||||
|
credit_note_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental)
|
||||||
|
|
||||||
|
# Verify exactly 1 record returned
|
||||||
|
assert len(output.records) == 1
|
||||||
|
|
||||||
|
# Verify state message was emitted
|
||||||
|
assert len(output.state_messages) > 0
|
||||||
|
|
||||||
|
# Verify state contains correct cursor value
|
||||||
|
latest_state = output.state_messages[-1].state.stream.stream_state
|
||||||
|
latest_cursor_value = int(latest_state.__dict__["updated_at"])
|
||||||
|
|
||||||
|
# Check response file for the actual timestamp value!
|
||||||
|
assert latest_cursor_value == 1705312800 # From credit_note.json
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_transformation_custom_fields(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test that CustomFieldTransformation converts cf_* fields to custom_fields array."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.credit_notes_endpoint().with_any_query_params().build(),
|
||||||
|
credit_note_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
|
||||||
|
# Assert record exists
|
||||||
|
assert len(output.records) == 1
|
||||||
|
record_data = output.records[0].record.data
|
||||||
|
|
||||||
|
# Assert cf_ fields are REMOVED from top level
|
||||||
|
assert not any(
|
||||||
|
key.startswith("cf_") for key in record_data.keys()
|
||||||
|
), "cf_ fields should be removed from record and moved to custom_fields array"
|
||||||
|
|
||||||
|
# Assert custom_fields array EXISTS
|
||||||
|
assert "custom_fields" in record_data, "custom_fields array should be created by CustomFieldTransformation"
|
||||||
|
assert isinstance(record_data["custom_fields"], list)
|
||||||
|
|
||||||
|
# Assert custom_fields array contains the transformed fields
|
||||||
|
assert len(record_data["custom_fields"]) == 2, "custom_fields array should contain 2 transformed fields"
|
||||||
|
|
||||||
|
# Verify structure and values of custom_fields items
|
||||||
|
custom_fields = {cf["name"]: cf["value"] for cf in record_data["custom_fields"]}
|
||||||
|
assert len(custom_fields) == 2, "Should have exactly 2 custom fields"
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_incremental_sync_with_state_and_params(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""
|
||||||
|
Test incremental sync with prior state and validate request parameters.
|
||||||
|
|
||||||
|
This test validates:
|
||||||
|
1. State from previous sync is accepted
|
||||||
|
2. Correct request parameters are sent (sort_by[asc]=date, include_deleted, updated_at[between])
|
||||||
|
3. State advances to latest record's cursor value
|
||||||
|
|
||||||
|
Note: credit_note stream uses updated_at cursor but sorts by "date" (not "updated_at").
|
||||||
|
"""
|
||||||
|
# ARRANGE: Previous state from last sync
|
||||||
|
previous_state_timestamp = 1704067200 # 2024-01-01T00:00:00
|
||||||
|
state = StateBuilder().with_stream_state(_STREAM_NAME, {"updated_at": previous_state_timestamp}).build()
|
||||||
|
|
||||||
|
# Mock API response with record AFTER the state timestamp
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.credit_notes_endpoint()
|
||||||
|
.with_sort_by_asc("date")
|
||||||
|
.with_include_deleted("true")
|
||||||
|
.with_updated_at_between(previous_state_timestamp, 1705320000) # Frozen time: 2024-01-15T12:00:00Z
|
||||||
|
.with_limit(100)
|
||||||
|
.build(),
|
||||||
|
credit_note_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
# ACT: Run incremental sync with state
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental, state=state)
|
||||||
|
|
||||||
|
# ASSERT: Records returned
|
||||||
|
assert len(output.records) == 1, "Should return exactly 1 record"
|
||||||
|
record = output.records[0].record.data
|
||||||
|
|
||||||
|
# ASSERT: Record data is correct
|
||||||
|
assert record["id"] == "cn_001"
|
||||||
|
assert record["updated_at"] >= previous_state_timestamp, "Record should be from after the state timestamp"
|
||||||
|
|
||||||
|
# ASSERT: State message emitted
|
||||||
|
assert len(output.state_messages) > 0, "Should emit state messages"
|
||||||
|
|
||||||
|
# ASSERT: State advances to latest record
|
||||||
|
latest_state = output.state_messages[-1].state.stream.stream_state
|
||||||
|
latest_cursor_value = int(latest_state.__dict__["updated_at"])
|
||||||
|
|
||||||
|
# State should advance beyond previous state
|
||||||
|
assert latest_cursor_value > previous_state_timestamp, f"State should advance: {latest_cursor_value} > {previous_state_timestamp}"
|
||||||
|
|
||||||
|
# State should match the latest record's cursor value
|
||||||
|
assert (
|
||||||
|
latest_cursor_value == 1705312800
|
||||||
|
), f"State should be latest record's cursor value: expected 1705312800, got {latest_cursor_value}"
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_error_configuration_incompatible_ignored(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test configuration_incompatible error is ignored for credit_note stream as configured in manifest."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.credit_notes_endpoint().with_any_query_params().build(),
|
||||||
|
configuration_incompatible_response(),
|
||||||
|
)
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
|
||||||
|
# Verify no records returned (error was ignored)
|
||||||
|
assert len(output.records) == 0
|
||||||
|
|
||||||
|
# Verify error message from manifest is logged
|
||||||
|
assert output.is_in_logs("Stream is available only for Product Catalog 1.0")
|
||||||
@@ -0,0 +1,182 @@
|
|||||||
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||||
|
|
||||||
|
from unittest import TestCase
|
||||||
|
|
||||||
|
import freezegun
|
||||||
|
|
||||||
|
from airbyte_cdk.models import SyncMode
|
||||||
|
from airbyte_cdk.test.mock_http import HttpMocker
|
||||||
|
from airbyte_cdk.test.state_builder import StateBuilder
|
||||||
|
|
||||||
|
from .request_builder import RequestBuilder
|
||||||
|
from .response_builder import (
|
||||||
|
customer_response,
|
||||||
|
customer_response_page1,
|
||||||
|
customer_response_page2,
|
||||||
|
)
|
||||||
|
from .utils import config, read_output
|
||||||
|
|
||||||
|
|
||||||
|
_STREAM_NAME = "customer"
|
||||||
|
|
||||||
|
|
||||||
|
@freezegun.freeze_time("2024-01-15T12:00:00Z")
|
||||||
|
class TestCustomerStream(TestCase):
|
||||||
|
"""Tests for the customer stream."""
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_read_records(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Basic read test for customer stream."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.customers_endpoint().with_any_query_params().build(),
|
||||||
|
customer_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
assert len(output.records) == 1
|
||||||
|
assert output.records[0].record.data["id"] == "cust_001"
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_pagination_two_pages(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""
|
||||||
|
Test pagination with 2 pages for customer stream.
|
||||||
|
|
||||||
|
IMPORTANT: Verified in manifest.yaml - all 27 streams use identical pagination:
|
||||||
|
- Type: DefaultPaginator
|
||||||
|
- Strategy: CursorPagination with next_offset
|
||||||
|
- Page Size: 100
|
||||||
|
- Stop Condition: when response has no next_offset
|
||||||
|
|
||||||
|
This single test validates pagination behavior for ALL 27 streams:
|
||||||
|
|
||||||
|
Standard streams (23): addon, comment, coupon, credit_note, customer,
|
||||||
|
differential_price, event, gift, hosted_page, invoice, item, item_family,
|
||||||
|
item_price, order, payment_source, plan, promotional_credit, quote,
|
||||||
|
site_migration_detail, subscription, transaction, unbilled_charge,
|
||||||
|
virtual_bank_account
|
||||||
|
|
||||||
|
Substreams (4): attached_item, contact, quote_line_group,
|
||||||
|
subscription_with_scheduled_changes
|
||||||
|
|
||||||
|
Test validates:
|
||||||
|
1. Page 1 response includes next_offset -> connector fetches page 2
|
||||||
|
2. Page 2 response has no next_offset -> pagination stops
|
||||||
|
3. All records from both pages are returned (2 records total)
|
||||||
|
"""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.customers_endpoint().with_any_query_params().build(),
|
||||||
|
[
|
||||||
|
customer_response_page1(),
|
||||||
|
customer_response_page2(),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
assert len(output.records) == 2
|
||||||
|
record_ids = [r.record.data["id"] for r in output.records]
|
||||||
|
assert "cust_001" in record_ids
|
||||||
|
assert "cust_002" in record_ids
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_incremental_emits_state(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test that incremental sync emits state message."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.customers_endpoint().with_any_query_params().build(),
|
||||||
|
customer_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental)
|
||||||
|
|
||||||
|
# Verify exactly 1 record returned
|
||||||
|
assert len(output.records) == 1
|
||||||
|
|
||||||
|
# Verify state message was emitted
|
||||||
|
assert len(output.state_messages) > 0
|
||||||
|
|
||||||
|
# Verify state contains correct cursor value
|
||||||
|
latest_state = output.state_messages[-1].state.stream.stream_state
|
||||||
|
latest_cursor_value = int(latest_state.__dict__["updated_at"])
|
||||||
|
|
||||||
|
# Check response file for the actual timestamp value!
|
||||||
|
assert latest_cursor_value == 1705312800 # From customer.json
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_transformation_custom_fields(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test that CustomFieldTransformation converts cf_* fields to custom_fields array."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.customers_endpoint().with_any_query_params().build(),
|
||||||
|
customer_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
|
||||||
|
# Assert record exists
|
||||||
|
assert len(output.records) == 1
|
||||||
|
record_data = output.records[0].record.data
|
||||||
|
|
||||||
|
# Assert cf_ fields are REMOVED from top level
|
||||||
|
assert not any(
|
||||||
|
key.startswith("cf_") for key in record_data.keys()
|
||||||
|
), "cf_ fields should be removed from record and moved to custom_fields array"
|
||||||
|
|
||||||
|
# Assert custom_fields array EXISTS
|
||||||
|
assert "custom_fields" in record_data, "custom_fields array should be created by CustomFieldTransformation"
|
||||||
|
assert isinstance(record_data["custom_fields"], list)
|
||||||
|
|
||||||
|
# Assert custom_fields array contains the transformed fields
|
||||||
|
assert len(record_data["custom_fields"]) == 2, "custom_fields array should contain 2 transformed fields"
|
||||||
|
|
||||||
|
# Verify structure and values of custom_fields items
|
||||||
|
custom_fields = {cf["name"]: cf["value"] for cf in record_data["custom_fields"]}
|
||||||
|
assert len(custom_fields) == 2, "Should have exactly 2 custom fields"
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_incremental_sync_with_state_and_params(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""
|
||||||
|
Test incremental sync with prior state and validate request parameters.
|
||||||
|
|
||||||
|
This test validates:
|
||||||
|
1. State from previous sync is accepted
|
||||||
|
2. Correct request parameters are sent (sort_by, include_deleted, updated_at[between])
|
||||||
|
3. State advances to latest record's cursor value
|
||||||
|
"""
|
||||||
|
# ARRANGE: Previous state from last sync
|
||||||
|
previous_state_timestamp = 1704067200 # 2024-01-01T00:00:00
|
||||||
|
state = StateBuilder().with_stream_state(_STREAM_NAME, {"updated_at": previous_state_timestamp}).build()
|
||||||
|
|
||||||
|
# Mock API response with record AFTER the state timestamp
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.customers_endpoint()
|
||||||
|
.with_sort_by_asc("updated_at")
|
||||||
|
.with_include_deleted("true")
|
||||||
|
.with_updated_at_between(previous_state_timestamp, 1705320000) # Frozen time: 2024-01-15T12:00:00Z
|
||||||
|
.with_limit(100)
|
||||||
|
.build(),
|
||||||
|
customer_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
# ACT: Run incremental sync with state
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental, state=state)
|
||||||
|
|
||||||
|
# ASSERT: Records returned
|
||||||
|
assert len(output.records) == 1, "Should return exactly 1 record"
|
||||||
|
record = output.records[0].record.data
|
||||||
|
|
||||||
|
# ASSERT: Record data is correct
|
||||||
|
assert record["id"] == "cust_001"
|
||||||
|
assert record["updated_at"] >= previous_state_timestamp, "Record should be from after the state timestamp"
|
||||||
|
|
||||||
|
# ASSERT: State message emitted
|
||||||
|
assert len(output.state_messages) > 0, "Should emit state messages"
|
||||||
|
|
||||||
|
# ASSERT: State advances to latest record
|
||||||
|
latest_state = output.state_messages[-1].state.stream.stream_state
|
||||||
|
latest_cursor_value = int(latest_state.__dict__["updated_at"])
|
||||||
|
|
||||||
|
# State should advance beyond previous state
|
||||||
|
assert latest_cursor_value > previous_state_timestamp, f"State should advance: {latest_cursor_value} > {previous_state_timestamp}"
|
||||||
|
|
||||||
|
# State should match the latest record's cursor value
|
||||||
|
assert (
|
||||||
|
latest_cursor_value == 1705312800
|
||||||
|
), f"State should be latest record's cursor value: expected 1705312800, got {latest_cursor_value}"
|
||||||
@@ -0,0 +1,152 @@
|
|||||||
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||||
|
|
||||||
|
from unittest import TestCase
|
||||||
|
|
||||||
|
import freezegun
|
||||||
|
|
||||||
|
from airbyte_cdk.models import SyncMode
|
||||||
|
from airbyte_cdk.test.mock_http import HttpMocker
|
||||||
|
from airbyte_cdk.test.state_builder import StateBuilder
|
||||||
|
|
||||||
|
from .request_builder import RequestBuilder
|
||||||
|
from .response_builder import configuration_incompatible_response, differential_price_response
|
||||||
|
from .utils import config, read_output
|
||||||
|
|
||||||
|
|
||||||
|
_STREAM_NAME = "differential_price"
|
||||||
|
|
||||||
|
|
||||||
|
@freezegun.freeze_time("2024-01-15T12:00:00Z")
|
||||||
|
class TestDifferentialPriceStream(TestCase):
|
||||||
|
"""Tests for the differential_price stream."""
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_read_records(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Basic read test for differential_price stream."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.differential_prices_endpoint().with_any_query_params().build(),
|
||||||
|
differential_price_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
assert len(output.records) == 1
|
||||||
|
assert output.records[0].record.data["id"] == "dp_001"
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_incremental_emits_state(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test that incremental sync emits state message."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.differential_prices_endpoint().with_any_query_params().build(),
|
||||||
|
differential_price_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental)
|
||||||
|
|
||||||
|
# Verify exactly 1 record returned
|
||||||
|
assert len(output.records) == 1
|
||||||
|
|
||||||
|
# Verify state message was emitted
|
||||||
|
assert len(output.state_messages) > 0
|
||||||
|
|
||||||
|
# Verify state contains correct cursor value
|
||||||
|
latest_state = output.state_messages[-1].state.stream.stream_state
|
||||||
|
latest_cursor_value = int(latest_state.__dict__["updated_at"])
|
||||||
|
|
||||||
|
# Check response file for the actual timestamp value!
|
||||||
|
assert latest_cursor_value == 1705312800 # From differential_price.json
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_transformation_custom_fields(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test that CustomFieldTransformation converts cf_* fields to custom_fields array."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.differential_prices_endpoint().with_any_query_params().build(),
|
||||||
|
differential_price_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
|
||||||
|
# Assert record exists
|
||||||
|
assert len(output.records) == 1
|
||||||
|
record_data = output.records[0].record.data
|
||||||
|
|
||||||
|
# Assert cf_ fields are REMOVED from top level
|
||||||
|
assert not any(
|
||||||
|
key.startswith("cf_") for key in record_data.keys()
|
||||||
|
), "cf_ fields should be removed from record and moved to custom_fields array"
|
||||||
|
|
||||||
|
# Assert custom_fields array EXISTS
|
||||||
|
assert "custom_fields" in record_data, "custom_fields array should be created by CustomFieldTransformation"
|
||||||
|
assert isinstance(record_data["custom_fields"], list)
|
||||||
|
|
||||||
|
# Assert custom_fields array contains the transformed fields
|
||||||
|
assert len(record_data["custom_fields"]) == 2, "custom_fields array should contain 2 transformed fields"
|
||||||
|
|
||||||
|
# Verify structure and values of custom_fields items
|
||||||
|
custom_fields = {cf["name"]: cf["value"] for cf in record_data["custom_fields"]}
|
||||||
|
assert len(custom_fields) == 2, "Should have exactly 2 custom fields"
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_incremental_sync_with_state_and_params(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""
|
||||||
|
Test incremental sync with prior state and validate request parameters.
|
||||||
|
|
||||||
|
This test validates:
|
||||||
|
1. State from previous sync is accepted
|
||||||
|
2. Correct request parameters are sent (sort_by, include_deleted, updated_at[between])
|
||||||
|
3. State advances to latest record's cursor value
|
||||||
|
"""
|
||||||
|
# ARRANGE: Previous state from last sync
|
||||||
|
previous_state_timestamp = 1704067200 # 2024-01-01T00:00:00
|
||||||
|
state = StateBuilder().with_stream_state(_STREAM_NAME, {"updated_at": previous_state_timestamp}).build()
|
||||||
|
|
||||||
|
# Mock API response with record AFTER the state timestamp
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.differential_prices_endpoint()
|
||||||
|
.with_sort_by_asc("updated_at")
|
||||||
|
.with_include_deleted("true")
|
||||||
|
.with_updated_at_between(previous_state_timestamp, 1705320000) # Frozen time: 2024-01-15T12:00:00Z
|
||||||
|
.with_limit(100)
|
||||||
|
.build(),
|
||||||
|
differential_price_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
# ACT: Run incremental sync with state
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental, state=state)
|
||||||
|
|
||||||
|
# ASSERT: Records returned
|
||||||
|
assert len(output.records) == 1, "Should return exactly 1 record"
|
||||||
|
record = output.records[0].record.data
|
||||||
|
|
||||||
|
# ASSERT: Record data is correct
|
||||||
|
assert record["id"] == "dp_001"
|
||||||
|
assert record["updated_at"] >= previous_state_timestamp, "Record should be from after the state timestamp"
|
||||||
|
|
||||||
|
# ASSERT: State message emitted
|
||||||
|
assert len(output.state_messages) > 0, "Should emit state messages"
|
||||||
|
|
||||||
|
# ASSERT: State advances to latest record
|
||||||
|
latest_state = output.state_messages[-1].state.stream.stream_state
|
||||||
|
latest_cursor_value = int(latest_state.__dict__["updated_at"])
|
||||||
|
|
||||||
|
# State should advance beyond previous state
|
||||||
|
assert latest_cursor_value > previous_state_timestamp, f"State should advance: {latest_cursor_value} > {previous_state_timestamp}"
|
||||||
|
|
||||||
|
# State should match the latest record's cursor value
|
||||||
|
assert (
|
||||||
|
latest_cursor_value == 1705312800
|
||||||
|
), f"State should be latest record's cursor value: expected 1705312800, got {latest_cursor_value}"
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_error_configuration_incompatible_ignored(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test configuration_incompatible error is ignored for differential_price stream as configured in manifest."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.differential_prices_endpoint().with_any_query_params().build(),
|
||||||
|
configuration_incompatible_response(),
|
||||||
|
)
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
|
||||||
|
# Verify no records returned (error was ignored)
|
||||||
|
assert len(output.records) == 0
|
||||||
|
|
||||||
|
# Verify error message from manifest is logged
|
||||||
|
assert output.is_in_logs("Stream is available only for Product Catalog 1.0")
|
||||||
@@ -0,0 +1,64 @@
|
|||||||
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||||
|
|
||||||
|
from http import HTTPStatus
|
||||||
|
from unittest import TestCase
|
||||||
|
|
||||||
|
import freezegun
|
||||||
|
|
||||||
|
from airbyte_cdk.test.mock_http import HttpMocker
|
||||||
|
|
||||||
|
from .request_builder import RequestBuilder
|
||||||
|
from .response_builder import (
|
||||||
|
configuration_incompatible_response,
|
||||||
|
customer_response,
|
||||||
|
empty_response,
|
||||||
|
error_response,
|
||||||
|
)
|
||||||
|
from .utils import config, read_output
|
||||||
|
|
||||||
|
|
||||||
|
@freezegun.freeze_time("2024-01-15T12:00:00Z")
|
||||||
|
class TestErrorHandling(TestCase):
|
||||||
|
"""Tests for error handling."""
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_error_configuration_incompatible_ignored(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test configuration_incompatible error is ignored as configured in manifest."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.customers_endpoint().with_any_query_params().build(),
|
||||||
|
configuration_incompatible_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name="customer")
|
||||||
|
assert len(output.records) == 0
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_contact_404_ignored(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test 404 error is ignored for contact stream as configured in manifest."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.customers_endpoint().with_any_query_params().build(),
|
||||||
|
customer_response(),
|
||||||
|
)
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.customer_contacts_endpoint("cust_001").with_any_query_params().build(),
|
||||||
|
error_response(HTTPStatus.NOT_FOUND),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name="contact")
|
||||||
|
assert len(output.records) == 0
|
||||||
|
|
||||||
|
|
||||||
|
@freezegun.freeze_time("2024-01-15T12:00:00Z")
|
||||||
|
class TestEmptyResponse(TestCase):
|
||||||
|
"""Tests for empty response handling."""
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_empty_response(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test handling of empty response."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.customers_endpoint().with_any_query_params().build(),
|
||||||
|
empty_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name="customer")
|
||||||
|
assert len(output.records) == 0
|
||||||
@@ -0,0 +1,152 @@
|
|||||||
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||||
|
|
||||||
|
from unittest import TestCase
|
||||||
|
|
||||||
|
import freezegun
|
||||||
|
|
||||||
|
from airbyte_cdk.models import SyncMode
|
||||||
|
from airbyte_cdk.test.mock_http import HttpMocker
|
||||||
|
from airbyte_cdk.test.state_builder import StateBuilder
|
||||||
|
|
||||||
|
from .request_builder import RequestBuilder
|
||||||
|
from .response_builder import configuration_incompatible_response, event_response
|
||||||
|
from .utils import config, read_output
|
||||||
|
|
||||||
|
|
||||||
|
_STREAM_NAME = "event"
|
||||||
|
|
||||||
|
|
||||||
|
@freezegun.freeze_time("2024-01-15T12:00:00Z")
|
||||||
|
class TestEventStream(TestCase):
|
||||||
|
"""Tests for the event stream."""
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_read_records(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Basic read test for event stream."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.events_endpoint().with_any_query_params().build(),
|
||||||
|
event_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
assert len(output.records) == 1
|
||||||
|
assert output.records[0].record.data["id"] == "ev_001"
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_incremental_emits_state(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test that incremental sync emits state message."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.events_endpoint().with_any_query_params().build(),
|
||||||
|
event_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental)
|
||||||
|
|
||||||
|
# Verify exactly 1 record returned
|
||||||
|
assert len(output.records) == 1
|
||||||
|
|
||||||
|
# Verify state message was emitted
|
||||||
|
assert len(output.state_messages) > 0
|
||||||
|
|
||||||
|
# Verify state contains correct cursor value (event uses occurred_at)
|
||||||
|
latest_state = output.state_messages[-1].state.stream.stream_state
|
||||||
|
latest_cursor_value = int(latest_state.__dict__["occurred_at"])
|
||||||
|
|
||||||
|
# Check response file for the actual timestamp value!
|
||||||
|
assert latest_cursor_value == 1705312800 # From event.json
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_transformation_custom_fields(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test that CustomFieldTransformation converts cf_* fields to custom_fields array."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.events_endpoint().with_any_query_params().build(),
|
||||||
|
event_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
|
||||||
|
# Assert record exists
|
||||||
|
assert len(output.records) == 1
|
||||||
|
record_data = output.records[0].record.data
|
||||||
|
|
||||||
|
# Assert cf_ fields are REMOVED from top level
|
||||||
|
assert not any(
|
||||||
|
key.startswith("cf_") for key in record_data.keys()
|
||||||
|
), "cf_ fields should be removed from record and moved to custom_fields array"
|
||||||
|
|
||||||
|
# Assert custom_fields array EXISTS
|
||||||
|
assert "custom_fields" in record_data, "custom_fields array should be created by CustomFieldTransformation"
|
||||||
|
assert isinstance(record_data["custom_fields"], list)
|
||||||
|
|
||||||
|
# Assert custom_fields array contains the transformed fields
|
||||||
|
assert len(record_data["custom_fields"]) == 2, "custom_fields array should contain 2 transformed fields"
|
||||||
|
|
||||||
|
# Verify structure and values of custom_fields items
|
||||||
|
custom_fields = {cf["name"]: cf["value"] for cf in record_data["custom_fields"]}
|
||||||
|
assert len(custom_fields) == 2, "Should have exactly 2 custom fields"
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_incremental_sync_with_state_and_params(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""
|
||||||
|
Test incremental sync with prior state and validate request parameters.
|
||||||
|
|
||||||
|
This test validates:
|
||||||
|
1. State from previous sync is accepted
|
||||||
|
2. Correct request parameters are sent (occurred_at[between] - NO sort_by or include_deleted for event stream)
|
||||||
|
3. State advances to latest record's cursor value
|
||||||
|
"""
|
||||||
|
# ARRANGE: Previous state from last sync
|
||||||
|
previous_state_timestamp = 1704067200 # 2024-01-01T00:00:00
|
||||||
|
state = StateBuilder().with_stream_state(_STREAM_NAME, {"occurred_at": previous_state_timestamp}).build()
|
||||||
|
|
||||||
|
# Mock API response with record AFTER the state timestamp
|
||||||
|
# Note: Event stream uses sort_by[asc]=occurred_at and occurred_at[between], but NO include_deleted
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.events_endpoint()
|
||||||
|
.with_sort_by_asc("occurred_at")
|
||||||
|
.with_occurred_at_between(previous_state_timestamp, 1705320000) # Frozen time: 2024-01-15T12:00:00Z
|
||||||
|
.with_limit(100)
|
||||||
|
.build(),
|
||||||
|
event_response(),
|
||||||
|
)
|
||||||
|
|
||||||
|
# ACT: Run incremental sync with state
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME, sync_mode=SyncMode.incremental, state=state)
|
||||||
|
|
||||||
|
# ASSERT: Records returned
|
||||||
|
assert len(output.records) == 1, "Should return exactly 1 record"
|
||||||
|
record = output.records[0].record.data
|
||||||
|
|
||||||
|
# ASSERT: Record data is correct
|
||||||
|
assert record["id"] == "ev_001"
|
||||||
|
assert record["occurred_at"] >= previous_state_timestamp, "Record should be from after the state timestamp"
|
||||||
|
|
||||||
|
# ASSERT: State message emitted
|
||||||
|
assert len(output.state_messages) > 0, "Should emit state messages"
|
||||||
|
|
||||||
|
# ASSERT: State advances to latest record
|
||||||
|
latest_state = output.state_messages[-1].state.stream.stream_state
|
||||||
|
latest_cursor_value = int(latest_state.__dict__["occurred_at"])
|
||||||
|
|
||||||
|
# State should advance beyond previous state
|
||||||
|
assert latest_cursor_value > previous_state_timestamp, f"State should advance: {latest_cursor_value} > {previous_state_timestamp}"
|
||||||
|
|
||||||
|
# State should match the latest record's cursor value
|
||||||
|
assert (
|
||||||
|
latest_cursor_value == 1705312800
|
||||||
|
), f"State should be latest record's cursor value: expected 1705312800, got {latest_cursor_value}"
|
||||||
|
|
||||||
|
@HttpMocker()
|
||||||
|
def test_error_configuration_incompatible_ignored(self, http_mocker: HttpMocker) -> None:
|
||||||
|
"""Test configuration_incompatible error is ignored for event stream as configured in manifest."""
|
||||||
|
http_mocker.get(
|
||||||
|
RequestBuilder.events_endpoint().with_any_query_params().build(),
|
||||||
|
configuration_incompatible_response(),
|
||||||
|
)
|
||||||
|
output = read_output(config_builder=config(), stream_name=_STREAM_NAME)
|
||||||
|
|
||||||
|
# Verify no records returned (error was ignored)
|
||||||
|
assert len(output.records) == 0
|
||||||
|
|
||||||
|
# Verify error message from manifest is logged
|
||||||
|
assert output.is_in_logs("Stream is available only for Product Catalog 1.0")
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user