Merge branch 'master' into agarctfi/source-google-drive/fix-excel-date-out-of-range
This commit is contained in:
3
.github/pr-welcome-community.md
vendored
3
.github/pr-welcome-community.md
vendored
@@ -18,7 +18,10 @@ As needed or by request, Airbyte Maintainers can execute the following slash com
|
||||
- `/bump-version` - Bumps connector versions.
|
||||
- `/run-connector-tests` - Runs connector tests.
|
||||
- `/run-cat-tests` - Runs CAT tests.
|
||||
- `/run-live-tests` - Runs live tests for the modified connector(s).
|
||||
- `/run-regression-tests` - Runs regression tests for the modified connector(s).
|
||||
- `/build-connector-images` - Builds and publishes a pre-release docker image for the modified connector(s).
|
||||
- `/publish-connectors-prerelease` - Publishes pre-release connector builds (tagged as `{version}-dev.{git-sha}`) for all modified connectors in the PR.
|
||||
|
||||
If you have any questions, feel free to ask in the PR comments or join our [Slack community](https://airbytehq.slack.com/).
|
||||
|
||||
|
||||
10
.github/pr-welcome-internal.md
vendored
10
.github/pr-welcome-internal.md
vendored
@@ -21,8 +21,18 @@ Airbyte Maintainers (that's you!) can execute the following slash commands on yo
|
||||
- `/bump-version` - Bumps connector versions.
|
||||
- You can specify a custom changelog by passing `changelog`. Example: `/bump-version changelog="My cool update"`
|
||||
- Leaving the changelog arg blank will auto-populate the changelog from the PR title.
|
||||
- `/bump-progressive-rollout-version` - Bumps connector version with an RC suffix for progressive rollouts.
|
||||
- Creates a release candidate version (e.g., `2.16.10-rc.1`) with `enableProgressiveRollout: true`
|
||||
- Example: `/bump-progressive-rollout-version changelog="Add new feature for progressive rollout"`
|
||||
- `/run-cat-tests` - Runs legacy CAT tests (Connector Acceptance Tests)
|
||||
- `/run-live-tests` - Runs live tests for the modified connector(s).
|
||||
- `/run-regression-tests` - Runs regression tests for the modified connector(s).
|
||||
- `/build-connector-images` - Builds and publishes a pre-release docker image for the modified connector(s).
|
||||
- `/publish-connectors-prerelease` - Publishes pre-release connector builds (tagged as `{version}-dev.{git-sha}`) for all modified connectors in the PR.
|
||||
- Connector release lifecycle (AI-powered):
|
||||
- `/ai-prove-fix` - Runs prerelease readiness checks, including testing against customer connections.
|
||||
- `/ai-canary-prerelease` - Rolls out prerelease to 5-10 connections for canary testing.
|
||||
- `/ai-release-watch` - Monitors rollout post-release and tracks sync success rates.
|
||||
- JVM connectors:
|
||||
- `/update-connector-cdk-version connector=<CONNECTOR_NAME>` - Updates the specified connector to the latest CDK version.
|
||||
Example: `/update-connector-cdk-version connector=destination-bigquery`
|
||||
|
||||
34
.github/prompts/regression-evaluation.prompt.yaml
vendored
Normal file
34
.github/prompts/regression-evaluation.prompt.yaml
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
name: Regression Report Evaluation
|
||||
description: Evaluate Airbyte connector regression test reports and return a JSON verdict with reasoning
|
||||
model: llama3.2:3b
|
||||
modelParameters:
|
||||
temperature: 0.3
|
||||
messages:
|
||||
- role: system
|
||||
content: |
|
||||
You are an expert at evaluating connector regression test results.
|
||||
Your task is to analyze the test report and determine if the regression tests should PASS or FAIL.
|
||||
|
||||
Consider the following criteria:
|
||||
1. All test cases should pass (no failed tests)
|
||||
2. Record count differences between control and target versions should be minimal or explainable
|
||||
3. Message count differences should not indicate data loss or corruption
|
||||
4. Stream coverage should be reasonable
|
||||
5. Any warnings or errors in test outputs should be evaluated for severity
|
||||
|
||||
Provide your evaluation in the following JSON format:
|
||||
{
|
||||
"pass": true/false,
|
||||
"summary": "A concise 2-3 sentence summary of the evaluation",
|
||||
"reasoning": "Detailed reasoning for your pass/fail decision, including specific issues found",
|
||||
"severity": "critical/major/minor/none",
|
||||
"recommendations": "Any recommendations for addressing issues"
|
||||
}
|
||||
|
||||
Be strict but fair in your evaluation. Minor differences are acceptable, but data loss,
|
||||
corruption, or test failures should result in a FAIL.
|
||||
- role: user
|
||||
content: |
|
||||
Report:
|
||||
|
||||
{{report_text}}
|
||||
71
.github/workflows/ai-canary-prerelease-command.yml
vendored
Normal file
71
.github/workflows/ai-canary-prerelease-command.yml
vendored
Normal file
@@ -0,0 +1,71 @@
|
||||
name: AI Canary Prerelease Command
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
pr:
|
||||
description: "Pull request number (if triggered from a PR)"
|
||||
type: number
|
||||
required: false
|
||||
comment-id:
|
||||
description: "The comment-id of the slash command. Used to update the comment with the status."
|
||||
required: false
|
||||
repo:
|
||||
description: "Repo (passed by slash command dispatcher)"
|
||||
required: false
|
||||
default: "airbytehq/airbyte"
|
||||
gitref:
|
||||
description: "Git ref (passed by slash command dispatcher)"
|
||||
required: false
|
||||
|
||||
run-name: "AI Canary Prerelease for PR #${{ github.event.inputs.pr }}"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
pull-requests: read
|
||||
|
||||
jobs:
|
||||
ai-canary-prerelease:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Get job variables
|
||||
id: job-vars
|
||||
run: |
|
||||
echo "run-url=https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Authenticate as GitHub App
|
||||
uses: actions/create-github-app-token@v2
|
||||
id: get-app-token
|
||||
with:
|
||||
owner: "airbytehq"
|
||||
repositories: "airbyte,oncall"
|
||||
app-id: ${{ secrets.OCTAVIA_BOT_APP_ID }}
|
||||
private-key: ${{ secrets.OCTAVIA_BOT_PRIVATE_KEY }}
|
||||
|
||||
- name: Post start comment
|
||||
if: inputs.comment-id != ''
|
||||
uses: peter-evans/create-or-update-comment@v4
|
||||
with:
|
||||
comment-id: ${{ inputs.comment-id }}
|
||||
issue-number: ${{ inputs.pr }}
|
||||
body: |
|
||||
> **AI Canary Prerelease Started**
|
||||
>
|
||||
> Rolling out to 5-10 connections, watching results, and reporting findings.
|
||||
> [View workflow run](${{ steps.job-vars.outputs.run-url }})
|
||||
|
||||
- name: Run AI Canary Prerelease
|
||||
uses: aaronsteers/devin-action@main
|
||||
with:
|
||||
comment-id: ${{ inputs.comment-id }}
|
||||
issue-number: ${{ inputs.pr }}
|
||||
playbook-macro: "!canary_prerelease"
|
||||
devin-token: ${{ secrets.DEVIN_AI_API_KEY }}
|
||||
github-token: ${{ steps.get-app-token.outputs.token }}
|
||||
start-message: "🐤 **AI Canary Prerelease session starting...** Rolling out to 5-10 connections, watching results, and reporting findings. [View playbook](https://github.com/airbytehq/oncall/blob/main/prompts/playbooks/canary_prerelease.md)"
|
||||
tags: |
|
||||
ai-oncall
|
||||
71
.github/workflows/ai-prove-fix-command.yml
vendored
Normal file
71
.github/workflows/ai-prove-fix-command.yml
vendored
Normal file
@@ -0,0 +1,71 @@
|
||||
name: AI Prove Fix Command
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
pr:
|
||||
description: "Pull request number (if triggered from a PR)"
|
||||
type: number
|
||||
required: false
|
||||
comment-id:
|
||||
description: "The comment-id of the slash command. Used to update the comment with the status."
|
||||
required: false
|
||||
repo:
|
||||
description: "Repo (passed by slash command dispatcher)"
|
||||
required: false
|
||||
default: "airbytehq/airbyte"
|
||||
gitref:
|
||||
description: "Git ref (passed by slash command dispatcher)"
|
||||
required: false
|
||||
|
||||
run-name: "AI Prove Fix for PR #${{ github.event.inputs.pr }}"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
pull-requests: read
|
||||
|
||||
jobs:
|
||||
ai-prove-fix:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Get job variables
|
||||
id: job-vars
|
||||
run: |
|
||||
echo "run-url=https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Authenticate as GitHub App
|
||||
uses: actions/create-github-app-token@v2
|
||||
id: get-app-token
|
||||
with:
|
||||
owner: "airbytehq"
|
||||
repositories: "airbyte,oncall"
|
||||
app-id: ${{ secrets.OCTAVIA_BOT_APP_ID }}
|
||||
private-key: ${{ secrets.OCTAVIA_BOT_PRIVATE_KEY }}
|
||||
|
||||
- name: Post start comment
|
||||
if: inputs.comment-id != ''
|
||||
uses: peter-evans/create-or-update-comment@v4
|
||||
with:
|
||||
comment-id: ${{ inputs.comment-id }}
|
||||
issue-number: ${{ inputs.pr }}
|
||||
body: |
|
||||
> **AI Prove Fix Started**
|
||||
>
|
||||
> Running readiness checks and testing against customer connections.
|
||||
> [View workflow run](${{ steps.job-vars.outputs.run-url }})
|
||||
|
||||
- name: Run AI Prove Fix
|
||||
uses: aaronsteers/devin-action@main
|
||||
with:
|
||||
comment-id: ${{ inputs.comment-id }}
|
||||
issue-number: ${{ inputs.pr }}
|
||||
playbook-macro: "!prove_fix"
|
||||
devin-token: ${{ secrets.DEVIN_AI_API_KEY }}
|
||||
github-token: ${{ steps.get-app-token.outputs.token }}
|
||||
start-message: "🔍 **AI Prove Fix session starting...** Running readiness checks and testing against customer connections. [View playbook](https://github.com/airbytehq/oncall/blob/main/prompts/playbooks/prove_fix.md)"
|
||||
tags: |
|
||||
ai-oncall
|
||||
71
.github/workflows/ai-release-watch-command.yml
vendored
Normal file
71
.github/workflows/ai-release-watch-command.yml
vendored
Normal file
@@ -0,0 +1,71 @@
|
||||
name: AI Release Watch Command
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
pr:
|
||||
description: "Pull request number (if triggered from a PR)"
|
||||
type: number
|
||||
required: false
|
||||
comment-id:
|
||||
description: "The comment-id of the slash command. Used to update the comment with the status."
|
||||
required: false
|
||||
repo:
|
||||
description: "Repo (passed by slash command dispatcher)"
|
||||
required: false
|
||||
default: "airbytehq/airbyte"
|
||||
gitref:
|
||||
description: "Git ref (passed by slash command dispatcher)"
|
||||
required: false
|
||||
|
||||
run-name: "AI Release Watch for PR #${{ github.event.inputs.pr }}"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
issues: write
|
||||
pull-requests: read
|
||||
|
||||
jobs:
|
||||
ai-release-watch:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Get job variables
|
||||
id: job-vars
|
||||
run: |
|
||||
echo "run-url=https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Authenticate as GitHub App
|
||||
uses: actions/create-github-app-token@v2
|
||||
id: get-app-token
|
||||
with:
|
||||
owner: "airbytehq"
|
||||
repositories: "airbyte,oncall"
|
||||
app-id: ${{ secrets.OCTAVIA_BOT_APP_ID }}
|
||||
private-key: ${{ secrets.OCTAVIA_BOT_PRIVATE_KEY }}
|
||||
|
||||
- name: Post start comment
|
||||
if: inputs.comment-id != ''
|
||||
uses: peter-evans/create-or-update-comment@v4
|
||||
with:
|
||||
comment-id: ${{ inputs.comment-id }}
|
||||
issue-number: ${{ inputs.pr }}
|
||||
body: |
|
||||
> **AI Release Watch Started**
|
||||
>
|
||||
> Monitoring rollout and tracking sync success rates.
|
||||
> [View workflow run](${{ steps.job-vars.outputs.run-url }})
|
||||
|
||||
- name: Run AI Release Watch
|
||||
uses: aaronsteers/devin-action@main
|
||||
with:
|
||||
comment-id: ${{ inputs.comment-id }}
|
||||
issue-number: ${{ inputs.pr }}
|
||||
playbook-macro: "!release_watch"
|
||||
devin-token: ${{ secrets.DEVIN_AI_API_KEY }}
|
||||
github-token: ${{ steps.get-app-token.outputs.token }}
|
||||
start-message: "👁️ **AI Release Watch session starting...** Monitoring rollout and tracking sync success rates. [View playbook](https://github.com/airbytehq/oncall/blob/main/prompts/playbooks/release_watch.md)"
|
||||
tags: |
|
||||
ai-oncall
|
||||
178
.github/workflows/bump-progressive-rollout-version-command.yml
vendored
Normal file
178
.github/workflows/bump-progressive-rollout-version-command.yml
vendored
Normal file
@@ -0,0 +1,178 @@
|
||||
name: Bump connector version for progressive rollout
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
pr:
|
||||
description: "Pull request number. This PR will be referenced in the changelog line."
|
||||
type: number
|
||||
required: false
|
||||
comment-id:
|
||||
description: "Optional. The comment-id of the slash command. Used to update the comment with the status."
|
||||
required: false
|
||||
|
||||
type:
|
||||
description: "The type of bump to perform. One of 'major', 'minor', or 'patch'."
|
||||
required: false
|
||||
default: "patch"
|
||||
|
||||
changelog:
|
||||
description: "Optional. The comment to add to the changelog. If not provided, the PR title will be used."
|
||||
required: false
|
||||
default: ""
|
||||
|
||||
# These must be declared, but they are unused and ignored.
|
||||
# TODO: Infer 'repo' and 'gitref' from PR number on other workflows, so we can remove these.
|
||||
repo:
|
||||
description: "Repo (Ignored)"
|
||||
required: false
|
||||
default: "airbytehq/airbyte"
|
||||
gitref:
|
||||
description: "Ref (Ignored)"
|
||||
required: false
|
||||
|
||||
run-name: "Bump connector version for progressive rollout in PR: #${{ github.event.inputs.pr }}"
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.inputs.pr }}
|
||||
# Cancel any previous runs on the same branch if they are still in progress
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
bump-progressive-rollout-version:
|
||||
name: "Bump version of connectors for progressive rollout in this PR"
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- name: Get job variables
|
||||
id: job-vars
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
shell: bash
|
||||
run: |
|
||||
PR_JSON=$(gh api repos/${{ github.repository }}/pulls/${{ github.event.inputs.pr }})
|
||||
echo "repo=$(echo "$PR_JSON" | jq -r .head.repo.full_name)" >> $GITHUB_OUTPUT
|
||||
echo "branch=$(echo "$PR_JSON" | jq -r .head.ref)" >> $GITHUB_OUTPUT
|
||||
echo "pr_title=$(echo "$PR_JSON" | jq -r .title)" >> $GITHUB_OUTPUT
|
||||
echo "run-url=https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
|
||||
|
||||
# NOTE: We still use a PAT here (rather than a GitHub App) because the workflow needs
|
||||
# permissions to add commits to our main repo as well as forks. This will only work on
|
||||
# forks if the user installs the app into their fork. Until we document this as a clear
|
||||
# path, we will have to keep using the PAT.
|
||||
- name: Checkout Airbyte
|
||||
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
|
||||
with:
|
||||
repository: ${{ steps.job-vars.outputs.repo }}
|
||||
ref: ${{ steps.job-vars.outputs.branch }}
|
||||
fetch-depth: 1
|
||||
# Important that token is a PAT so that CI checks are triggered again.
|
||||
# Without this we would be forever waiting on required checks to pass.
|
||||
token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }}
|
||||
|
||||
- name: Append comment with job run link
|
||||
# If comment-id is not provided, this will create a new
|
||||
# comment with the job run link.
|
||||
id: first-comment-action
|
||||
uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
|
||||
with:
|
||||
comment-id: ${{ github.event.inputs.comment-id }}
|
||||
issue-number: ${{ github.event.inputs.pr }}
|
||||
body: |
|
||||
|
||||
> **Progressive Rollout Version Bump Started**
|
||||
>
|
||||
> This will bump the connector version with an RC suffix and enable progressive rollout.
|
||||
> [Check job output.][1]
|
||||
|
||||
[1]: ${{ steps.job-vars.outputs.run-url }}
|
||||
|
||||
- name: Log changelog source
|
||||
run: |
|
||||
if [ -n "${{ github.event.inputs.changelog }}" ]; then
|
||||
echo "Using user-provided changelog: ${{ github.event.inputs.changelog }}"
|
||||
else
|
||||
echo "Using PR title as changelog: ${{ steps.job-vars.outputs.pr_title }}"
|
||||
fi
|
||||
|
||||
- name: Run airbyte-ci connectors --modified bump-version with --rc flag
|
||||
uses: ./.github/actions/run-airbyte-ci
|
||||
continue-on-error: true
|
||||
with:
|
||||
context: "manual"
|
||||
gcs_credentials: ${{ secrets.METADATA_SERVICE_PROD_GCS_CREDENTIALS }}
|
||||
sentry_dsn: ${{ secrets.SENTRY_AIRBYTE_CI_DSN }}
|
||||
github_token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }}
|
||||
git_repo_url: https://github.com/${{ steps.job-vars.outputs.repo }}.git
|
||||
subcommand: |
|
||||
connectors --modified bump-version \
|
||||
${{ github.event.inputs.type }} \
|
||||
"${{ github.event.inputs.changelog != '' && github.event.inputs.changelog || steps.job-vars.outputs.pr_title }}" \
|
||||
--pr-number ${{ github.event.inputs.pr }} \
|
||||
--rc
|
||||
|
||||
# This is helpful in the case that we change a previously committed generated file to be ignored by git.
|
||||
- name: Remove any files that have been gitignored
|
||||
run: git ls-files -i -c --exclude-from=.gitignore | xargs -r git rm --cached
|
||||
|
||||
# Check for changes in git
|
||||
- name: Check for changes
|
||||
id: git-diff
|
||||
run: |
|
||||
git diff --quiet && echo "No changes to commit" || echo "changes=true" >> $GITHUB_OUTPUT
|
||||
shell: bash
|
||||
|
||||
# Commit changes (if any)
|
||||
- name: Commit changes
|
||||
id: commit-step
|
||||
if: steps.git-diff.outputs.changes == 'true'
|
||||
run: |
|
||||
git config --global user.name "Octavia Squidington III"
|
||||
git config --global user.email "octavia-squidington-iii@users.noreply.github.com"
|
||||
git add .
|
||||
git commit -m "chore: bump-version for progressive rollout"
|
||||
echo "sha=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Push changes to '(${{ steps.job-vars.outputs.repo }})'
|
||||
if: steps.git-diff.outputs.changes == 'true'
|
||||
run: |
|
||||
git remote add contributor https://github.com/${{ steps.job-vars.outputs.repo }}.git
|
||||
git push contributor HEAD:'${{ steps.job-vars.outputs.branch }}'
|
||||
|
||||
- name: Append success comment
|
||||
uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
|
||||
if: steps.git-diff.outputs.changes == 'true'
|
||||
with:
|
||||
comment-id: ${{ steps.first-comment-action.outputs.comment-id }}
|
||||
reactions: hooray
|
||||
body: |
|
||||
> **Progressive Rollout Version Bump: SUCCESS**
|
||||
>
|
||||
> The connector version has been bumped with an RC suffix (e.g., `X.Y.Z-rc.1`).
|
||||
> Changes applied successfully. (${{ steps.commit-step.outputs.sha }})
|
||||
>
|
||||
> **Next steps:**
|
||||
> 1. Merge this PR to publish the RC version
|
||||
> 2. Monitor the progressive rollout in production
|
||||
> 3. When ready to promote, use the `finalize_rollout` workflow with `action=promote`
|
||||
> 4. If issues arise, use `action=rollback` instead
|
||||
|
||||
- name: Append success comment (no-op)
|
||||
uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
|
||||
if: steps.git-diff.outputs.changes != 'true'
|
||||
with:
|
||||
comment-id: ${{ steps.first-comment-action.outputs.comment-id }}
|
||||
reactions: "-1"
|
||||
body: |
|
||||
> Job completed successfully (no changes detected).
|
||||
>
|
||||
> This might happen if:
|
||||
> - The connector already has an RC version
|
||||
> - No modified connectors were detected in this PR
|
||||
|
||||
- name: Append failure comment
|
||||
uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
|
||||
if: failure()
|
||||
with:
|
||||
comment-id: ${{ steps.first-comment-action.outputs.comment-id }}
|
||||
reactions: confused
|
||||
body: |
|
||||
> Job failed. Check the [workflow logs](${{ steps.job-vars.outputs.run-url }}) for details.
|
||||
173
.github/workflows/kotlin-bulk-cdk-dokka-publish.yml
vendored
Normal file
173
.github/workflows/kotlin-bulk-cdk-dokka-publish.yml
vendored
Normal file
@@ -0,0 +1,173 @@
|
||||
name: Kotlin Bulk CDK Docs
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
paths:
|
||||
- "airbyte-cdk/bulk/**"
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
paths:
|
||||
- "airbyte-cdk/bulk/**"
|
||||
workflow_dispatch:
|
||||
|
||||
# Concurrency group ensures only one deployment runs at a time
|
||||
concurrency:
|
||||
group: kotlin-bulk-cdk-docs-${{ github.ref }}
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
detect-changes:
|
||||
name: Detect Kotlin Bulk CDK Changes
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- name: Force 'changed=true' [Manual Trigger]
|
||||
id: set-changed
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: echo "changed=true" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Checkout Repository [Push Trigger]
|
||||
if: github.event_name == 'push'
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 2
|
||||
|
||||
- name: Detect Changes
|
||||
# PR triggers will use API (don't require pre-checkout.)
|
||||
# Push triggers will require the checked-out code.
|
||||
id: detect-changes
|
||||
if: github.event_name != 'workflow_dispatch'
|
||||
uses: dorny/paths-filter@v3.0.2
|
||||
with:
|
||||
filters: |
|
||||
bulk-cdk:
|
||||
- 'airbyte-cdk/bulk/**'
|
||||
|
||||
outputs:
|
||||
changed: ${{ steps.set-changed.outputs.changed || steps.detect-changes.outputs.bulk-cdk }}
|
||||
|
||||
build-docs:
|
||||
name: Build Kotlin Bulk CDK Documentation
|
||||
runs-on: ubuntu-24.04
|
||||
needs: detect-changes
|
||||
# Build docs if changes detected OR if manually triggered via workflow_dispatch
|
||||
if: needs.detect-changes.outputs.changed == 'true' || github.event_name == 'workflow_dispatch'
|
||||
|
||||
steps:
|
||||
- name: Checkout Repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 1
|
||||
repository: ${{ github.event.pull_request.head.repo.full_name || github.repository }}
|
||||
ref: ${{ github.head_ref || github.ref }}
|
||||
|
||||
- name: Set up Java
|
||||
uses: actions/setup-java@v4
|
||||
with:
|
||||
distribution: "zulu"
|
||||
java-version: "21"
|
||||
|
||||
- name: Setup Gradle
|
||||
uses: gradle/gradle-build-action@v3
|
||||
with:
|
||||
gradle-version: wrapper
|
||||
|
||||
- name: Generate Dokka Documentation
|
||||
run: |
|
||||
echo "📚 Generating Dokka documentation for Kotlin Bulk CDK..."
|
||||
./gradlew :airbyte-cdk:bulk:dokkaHtmlMultiModule --no-daemon
|
||||
|
||||
echo "✅ Documentation generated successfully"
|
||||
ls -la airbyte-cdk/bulk/build/dokka/htmlMultiModule/
|
||||
|
||||
- name: Upload Documentation Artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: kotlin-bulk-cdk-docs-${{ github.sha }}
|
||||
path: airbyte-cdk/bulk/build/dokka/htmlMultiModule/
|
||||
retention-days: 30
|
||||
|
||||
vercel-deploy:
|
||||
name: Deploy Docs to Vercel ${{ github.ref == 'refs/heads/master' && '(Production)' || '(Preview)' }}
|
||||
needs: [detect-changes, build-docs]
|
||||
# Deploy for: non-fork PRs, master branch pushes, OR manual workflow_dispatch
|
||||
# Always require Vercel project to be configured
|
||||
if: >
|
||||
(needs.detect-changes.outputs.changed == 'true' || github.event_name == 'workflow_dispatch')
|
||||
&& (
|
||||
github.event_name == 'push'
|
||||
|| github.event.pull_request.head.repo.full_name == github.repository
|
||||
|| github.event_name == 'workflow_dispatch'
|
||||
)
|
||||
&& vars.VERCEL_KOTLIN_CDK_PROJECT_ID != ''
|
||||
runs-on: ubuntu-24.04
|
||||
environment:
|
||||
name: ${{ github.ref == 'refs/heads/master' && 'kotlin-cdk-docs' || 'kotlin-cdk-docs-preview' }}
|
||||
url: ${{ steps.deploy-vercel.outputs.preview-url }}
|
||||
env:
|
||||
VERCEL_ORG_ID: ${{ secrets.VERCEL_ORG_ID }}
|
||||
VERCEL_PROJECT_ID: ${{ vars.VERCEL_KOTLIN_CDK_PROJECT_ID }}
|
||||
|
||||
steps:
|
||||
- name: Checkout Repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Download Documentation Artifact
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: kotlin-bulk-cdk-docs-${{ github.sha }}
|
||||
path: docs-output/airbyte-cdk/bulk
|
||||
|
||||
- name: Debug - Show artifact structure
|
||||
run: |
|
||||
echo "📂 Artifact structure:"
|
||||
ls -lah docs-output/airbyte-cdk/bulk
|
||||
echo ""
|
||||
echo "🔍 Looking for index.html:"
|
||||
find docs-output -type f -name "index.html" -print
|
||||
echo ""
|
||||
echo "✅ Verifying deployment path..."
|
||||
test -f docs-output/airbyte-cdk/bulk/index.html && echo "✅ index.html found at expected path" || echo "❌ index.html NOT found at expected path"
|
||||
|
||||
- name: Debug - Deployment Mode
|
||||
run: |
|
||||
echo "Event: ${{ github.event_name }}"
|
||||
echo "Ref: ${{ github.ref }}"
|
||||
echo "Is Production: ${{ github.ref == 'refs/heads/master' }}"
|
||||
echo "Vercel Args: ${{ github.ref == 'refs/heads/master' && '--prod' || '(none - preview)' }}"
|
||||
|
||||
- name: Deploy to Vercel
|
||||
id: deploy-vercel
|
||||
uses: amondnet/vercel-action@v41.1.4
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
vercel-token: ${{ secrets.VERCEL_TOKEN }}
|
||||
vercel-org-id: ${{ env.VERCEL_ORG_ID }}
|
||||
vercel-project-id: ${{ env.VERCEL_PROJECT_ID }}
|
||||
working-directory: docs-output
|
||||
vercel-args: ${{ github.ref == 'refs/heads/master' && '--prod' || '' }}
|
||||
|
||||
- name: Authenticate as GitHub App
|
||||
if: github.event_name == 'pull_request'
|
||||
uses: actions/create-github-app-token@v2.0.6
|
||||
id: get-app-token
|
||||
with:
|
||||
owner: "airbytehq"
|
||||
repositories: "airbyte"
|
||||
app-id: ${{ secrets.OCTAVIA_BOT_APP_ID }}
|
||||
private-key: ${{ secrets.OCTAVIA_BOT_PRIVATE_KEY }}
|
||||
|
||||
- name: Post Custom Check with Preview URL
|
||||
if: github.event_name == 'pull_request'
|
||||
uses: LouisBrunner/checks-action@v2.0.0
|
||||
with:
|
||||
name: "Kotlin Bulk CDK Docs Preview"
|
||||
status: completed
|
||||
conclusion: success
|
||||
details_url: ${{ steps.deploy-vercel.outputs.preview-url }}
|
||||
token: ${{ steps.get-app-token.outputs.token }}
|
||||
output: |
|
||||
{"summary":"Documentation preview deployed successfully","text":"View the Kotlin Bulk CDK documentation at the preview URL"}
|
||||
28
.github/workflows/label-community-prs.yml
vendored
Normal file
28
.github/workflows/label-community-prs.yml
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
name: Label Community PRs
|
||||
|
||||
# This workflow automatically adds the "community" label to PRs from forks.
|
||||
# This enables automatic tracking on the Community PRs project board.
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
types:
|
||||
- opened
|
||||
- reopened
|
||||
|
||||
jobs:
|
||||
label-community-pr:
|
||||
name: Add "Community" Label to PR
|
||||
# Only run for PRs from forks
|
||||
if: github.event.pull_request.head.repo.fork == true
|
||||
runs-on: ubuntu-24.04
|
||||
permissions:
|
||||
issues: write
|
||||
pull-requests: write
|
||||
steps:
|
||||
- name: Add community label
|
||||
# This action uses GitHub's addLabels API, which is idempotent.
|
||||
# If the label already exists, the API call succeeds without error.
|
||||
uses: actions-ecosystem/action-add-labels@bd52874380e3909a1ac983768df6976535ece7f8 # v1.1.3
|
||||
with:
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
labels: community
|
||||
209
.github/workflows/publish-connectors-prerelease-command.yml
vendored
Normal file
209
.github/workflows/publish-connectors-prerelease-command.yml
vendored
Normal file
@@ -0,0 +1,209 @@
|
||||
name: Publish Connectors Pre-release
|
||||
# This workflow publishes a pre-release connector build from a PR branch.
|
||||
# It can be triggered via the /publish-connectors-prerelease slash command from PR comments,
|
||||
# or via the MCP tool `publish_connector_to_airbyte_registry`.
|
||||
#
|
||||
# Pre-release versions are tagged with the format: {version}-dev.{10-char-git-sha}
|
||||
# These versions are NOT eligible for semver auto-advancement but ARE available
|
||||
# for version pinning via the scoped_configuration API.
|
||||
#
|
||||
# Usage:
|
||||
# /publish-connectors-prerelease # Auto-detects single modified connector
|
||||
# /publish-connectors-prerelease connector=source-github # Explicit connector name
|
||||
#
|
||||
# If no connector is specified, the workflow auto-detects modified connectors.
|
||||
# It will fail if 0 or 2+ connectors are modified (only single-connector publishing is supported).
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
# Global static-arg inputs for slash commands
|
||||
repo:
|
||||
description: "The repository name"
|
||||
required: false
|
||||
default: "airbytehq/airbyte"
|
||||
type: string
|
||||
gitref:
|
||||
description: "The git reference (branch or tag)"
|
||||
required: false
|
||||
type: string
|
||||
comment-id:
|
||||
description: "The ID of the comment triggering the workflow"
|
||||
required: false
|
||||
type: number
|
||||
pr:
|
||||
description: "The pull request number, if applicable"
|
||||
required: false
|
||||
type: number
|
||||
connector:
|
||||
description: "Single connector name to publish (e.g., destination-pinecone). If not provided, auto-detects from PR changes (fails if 0 or 2+ connectors modified)."
|
||||
required: false
|
||||
type: string
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.inputs.pr || github.run_id }}
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
init:
|
||||
name: Initialize Pre-release Publish
|
||||
runs-on: ubuntu-24.04
|
||||
outputs:
|
||||
run-url: ${{ steps.job-vars.outputs.run-url }}
|
||||
pr-number: ${{ steps.job-vars.outputs.pr-number }}
|
||||
comment-id: ${{ steps.append-start-comment.outputs.comment-id }}
|
||||
short-sha: ${{ steps.get-sha.outputs.short-sha }}
|
||||
connector-name: ${{ steps.resolve-connector.outputs.connector-name }}
|
||||
connector-version: ${{ steps.connector-version.outputs.connector-version }}
|
||||
steps:
|
||||
- name: Checkout to get commit SHA
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: ${{ inputs.repo || github.repository }}
|
||||
ref: ${{ inputs.gitref || '' }}
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get short SHA
|
||||
id: get-sha
|
||||
run: |
|
||||
SHORT_SHA=$(git rev-parse --short=10 HEAD)
|
||||
echo "short-sha=$SHORT_SHA" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Get job variables
|
||||
id: job-vars
|
||||
run: |
|
||||
echo "run-url=https://github.com/${{ github.repository }}/actions/runs/$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
|
||||
echo "pr-number=${{ inputs.pr }}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Resolve connector name
|
||||
id: resolve-connector
|
||||
run: |
|
||||
set -euo pipefail
|
||||
if [[ -n "${{ inputs.connector }}" ]]; then
|
||||
echo "Connector explicitly provided: ${{ inputs.connector }}"
|
||||
echo "connector-name=${{ inputs.connector }}" >> "$GITHUB_OUTPUT"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "No connector provided, detecting modified connectors..."
|
||||
MODIFIED_JSON=$(./poe-tasks/get-modified-connectors.sh --json)
|
||||
echo "Modified connectors JSON: $MODIFIED_JSON"
|
||||
|
||||
CONNECTORS=$(echo "$MODIFIED_JSON" | jq -r '.connector | map(select(. != "")) | .[]')
|
||||
CONNECTOR_COUNT=$(echo "$MODIFIED_JSON" | jq -r '.connector | map(select(. != "")) | length')
|
||||
|
||||
echo "Found $CONNECTOR_COUNT modified connector(s)"
|
||||
|
||||
if [[ "$CONNECTOR_COUNT" -eq 0 ]]; then
|
||||
echo "::error::No modified connectors found in this PR. Please specify a connector name explicitly."
|
||||
exit 1
|
||||
elif [[ "$CONNECTOR_COUNT" -gt 1 ]]; then
|
||||
echo "::error::Multiple modified connectors found: $CONNECTORS. This workflow only supports publishing one connector at a time. Please specify a connector name explicitly."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CONNECTOR_NAME=$(echo "$CONNECTORS" | head -n1)
|
||||
echo "Auto-detected single modified connector: $CONNECTOR_NAME"
|
||||
echo "connector-name=$CONNECTOR_NAME" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Determine connector version
|
||||
id: connector-version
|
||||
run: |
|
||||
set -euo pipefail
|
||||
CONNECTOR_NAME="${{ steps.resolve-connector.outputs.connector-name }}"
|
||||
CONNECTOR_DIR="airbyte-integrations/connectors/$CONNECTOR_NAME"
|
||||
VERSION=""
|
||||
if [[ -f "$CONNECTOR_DIR/manifest.yaml" ]]; then
|
||||
VERSION=$(grep -E '^\s*version:' "$CONNECTOR_DIR/manifest.yaml" | head -n1 | awk '{print $2}' | tr -d '"')
|
||||
fi
|
||||
if [[ -z "$VERSION" ]] && [[ -f "$CONNECTOR_DIR/metadata.yaml" ]]; then
|
||||
VERSION=$(grep -E '^\s*dockerImageTag:' "$CONNECTOR_DIR/metadata.yaml" | head -n1 | awk '{print $2}' | tr -d '"')
|
||||
fi
|
||||
echo "connector-version=$VERSION" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Append start comment
|
||||
id: append-start-comment
|
||||
if: inputs.comment-id != '' || inputs.pr != ''
|
||||
uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
|
||||
with:
|
||||
comment-id: ${{ inputs.comment-id }}
|
||||
issue-number: ${{ steps.job-vars.outputs.pr-number }}
|
||||
reactions: "+1"
|
||||
body: |
|
||||
> **Pre-release Connector Publish Started**
|
||||
>
|
||||
> Publishing pre-release build for connector `${{ steps.resolve-connector.outputs.connector-name }}`.
|
||||
> Branch: `${{ inputs.gitref }}`
|
||||
>
|
||||
> Pre-release versions will be tagged as `{version}-dev.${{ steps.get-sha.outputs.short-sha }}`
|
||||
> and are available for version pinning via the scoped_configuration API.
|
||||
>
|
||||
> [View workflow run](${{ steps.job-vars.outputs.run-url }})
|
||||
|
||||
publish:
|
||||
name: Publish Pre-release
|
||||
needs: [init]
|
||||
uses: ./.github/workflows/publish_connectors.yml
|
||||
with:
|
||||
connectors: ${{ format('--name={0}', needs.init.outputs.connector-name) }}
|
||||
release-type: pre-release
|
||||
secrets: inherit
|
||||
|
||||
post-completion:
|
||||
name: Post Completion Status
|
||||
needs: [init, publish]
|
||||
runs-on: ubuntu-24.04
|
||||
if: always() && (inputs.comment-id != '' || inputs.pr != '')
|
||||
steps:
|
||||
- name: Determine publish status
|
||||
id: status
|
||||
run: |
|
||||
if [[ "${{ needs.publish.result }}" == "success" ]]; then
|
||||
echo "status_emoji=:white_check_mark:" >> $GITHUB_OUTPUT
|
||||
echo "status_text=SUCCESS" >> $GITHUB_OUTPUT
|
||||
elif [[ "${{ needs.publish.result }}" == "failure" ]]; then
|
||||
echo "status_emoji=:x:" >> $GITHUB_OUTPUT
|
||||
echo "status_text=FAILED" >> $GITHUB_OUTPUT
|
||||
elif [[ "${{ needs.publish.result }}" == "cancelled" ]]; then
|
||||
echo "status_emoji=:warning:" >> $GITHUB_OUTPUT
|
||||
echo "status_text=CANCELLED" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "status_emoji=:grey_question:" >> $GITHUB_OUTPUT
|
||||
echo "status_text=UNKNOWN" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Prepare message variables
|
||||
id: message-vars
|
||||
run: |
|
||||
CONNECTOR_NAME="${{ needs.init.outputs.connector-name }}"
|
||||
# Use the actual docker-image-tag from the publish workflow output
|
||||
DOCKER_TAG="${{ needs.publish.outputs.docker-image-tag }}"
|
||||
|
||||
if [[ -z "$DOCKER_TAG" ]]; then
|
||||
echo "::error::docker-image-tag output is missing from publish workflow. This is unexpected."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "connector_name=$CONNECTOR_NAME" >> $GITHUB_OUTPUT
|
||||
echo "docker_image=airbyte/$CONNECTOR_NAME" >> $GITHUB_OUTPUT
|
||||
echo "docker_tag=$DOCKER_TAG" >> $GITHUB_OUTPUT
|
||||
echo "dockerhub_url=https://hub.docker.com/layers/airbyte/$CONNECTOR_NAME/$DOCKER_TAG" >> $GITHUB_OUTPUT
|
||||
echo "oss_registry_url=https://connectors.airbyte.com/files/metadata/airbyte/$CONNECTOR_NAME/$DOCKER_TAG/oss.json" >> $GITHUB_OUTPUT
|
||||
echo "cloud_registry_url=https://connectors.airbyte.com/files/metadata/airbyte/$CONNECTOR_NAME/$DOCKER_TAG/cloud.json" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Append completion comment
|
||||
uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
|
||||
with:
|
||||
comment-id: ${{ needs.init.outputs.comment-id }}
|
||||
issue-number: ${{ needs.init.outputs.pr-number }}
|
||||
body: |
|
||||
> **Pre-release Publish: ${{ steps.status.outputs.status_text }}** ${{ steps.status.outputs.status_emoji }}
|
||||
>
|
||||
> **Docker image (pre-release):**
|
||||
> `${{ steps.message-vars.outputs.docker_image }}:${{ steps.message-vars.outputs.docker_tag }}`
|
||||
>
|
||||
> **Docker Hub:** ${{ steps.message-vars.outputs.dockerhub_url }}
|
||||
>
|
||||
> **Registry JSON:**
|
||||
> - [OSS Registry](${{ steps.message-vars.outputs.oss_registry_url }})
|
||||
> - [Cloud Registry](${{ steps.message-vars.outputs.cloud_registry_url }})
|
||||
6
.github/workflows/publish_connectors.yml
vendored
6
.github/workflows/publish_connectors.yml
vendored
@@ -21,6 +21,10 @@ on:
|
||||
required: false
|
||||
default: false
|
||||
type: boolean
|
||||
outputs:
|
||||
docker-image-tag:
|
||||
description: "Docker image tag used when publishing. For single-connector callers only; multi-connector callers should not rely on this output."
|
||||
value: ${{ jobs.publish_connector_registry_entries.outputs.docker-image-tag }}
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
connectors:
|
||||
@@ -250,6 +254,8 @@ jobs:
|
||||
max-parallel: 5
|
||||
# Allow all jobs to run, even if one fails
|
||||
fail-fast: false
|
||||
outputs:
|
||||
docker-image-tag: ${{ steps.connector-metadata.outputs.docker-image-tag }}
|
||||
steps:
|
||||
- name: Checkout Airbyte
|
||||
# v4
|
||||
|
||||
167
.github/workflows/regression_tests.yml
vendored
167
.github/workflows/regression_tests.yml
vendored
@@ -1,167 +0,0 @@
|
||||
name: Connector Ops CI - Run Regression Tests
|
||||
|
||||
concurrency:
|
||||
# This is the name of the concurrency group. It is used to prevent concurrent runs of the same workflow.
|
||||
#
|
||||
# - github.head_ref is only defined on PR runs, it makes sure that the concurrency group is unique for pull requests
|
||||
# ensuring that only one run per pull request is active at a time.
|
||||
#
|
||||
# - github.run_id is defined on all runs, it makes sure that the concurrency group is unique for workflow dispatches.
|
||||
# This allows us to run multiple workflow dispatches in parallel.
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
connector_name:
|
||||
description: Connector name (e.g. source-faker)
|
||||
required: true
|
||||
connection_id:
|
||||
description: ID of the connection to test; use "auto" to let the connection retriever choose a connection
|
||||
required: true
|
||||
default: auto
|
||||
pr_url:
|
||||
description: URL of the PR containing the code change
|
||||
required: true
|
||||
streams:
|
||||
description: Streams to include in regression tests
|
||||
should_read_with_state:
|
||||
description: Whether to run tests against the read command with state
|
||||
default: "true"
|
||||
type: boolean
|
||||
use_local_cdk:
|
||||
description: Use the local CDK when building the target connector
|
||||
default: "false"
|
||||
type: boolean
|
||||
disable_proxy:
|
||||
description: Disable proxy for requests
|
||||
default: "false"
|
||||
type: boolean
|
||||
connection_subset:
|
||||
description: The subset of connections to select from.
|
||||
required: true
|
||||
type: choice
|
||||
default: all
|
||||
options:
|
||||
- sandboxes
|
||||
- all
|
||||
control_version:
|
||||
description: The version to use as a control version. This is useful when the version defined in the cloud registry does not have a lot of usage (either because a progressive rollout is underway or because a new version has just been released).
|
||||
required: false
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
regression_tests:
|
||||
name: Regression Tests
|
||||
runs-on: linux-24.04-large # Custom runner, defined in GitHub org settings
|
||||
timeout-minutes: 360 # 6 hours
|
||||
steps:
|
||||
- name: Install Python
|
||||
id: install_python
|
||||
uses: actions/setup-python@7f4fc3e22c37d6ff65e88745f38bd3157c663f7c # v4.9.1
|
||||
with:
|
||||
python-version: "3.11"
|
||||
check-latest: true
|
||||
update-environment: true
|
||||
|
||||
- name: Checkout Airbyte
|
||||
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
|
||||
- name: Extract branch name [WORKFLOW DISPATCH]
|
||||
shell: bash
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: echo "branch=${GITHUB_REF#refs/heads/}" >> $GITHUB_OUTPUT
|
||||
id: extract_branch
|
||||
|
||||
- name: Install Poetry
|
||||
id: install_poetry
|
||||
uses: snok/install-poetry@76e04a911780d5b312d89783f7b1cd627778900a # v1.4.1
|
||||
with:
|
||||
version: 1.8.5
|
||||
|
||||
- name: Make poetry venv in project
|
||||
id: poetry_venv
|
||||
run: poetry config virtualenvs.in-project true
|
||||
|
||||
- name: Install Python packages
|
||||
id: install_python_packages
|
||||
working-directory: airbyte-ci/connectors/pipelines
|
||||
run: poetry install
|
||||
|
||||
- name: Fetch last commit id from remote branch [WORKFLOW DISPATCH]
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
id: fetch_last_commit_id_wd
|
||||
run: echo "commit_id=$(git rev-parse origin/${{ steps.extract_branch.outputs.branch }})" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Setup Stream Parameters
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: |
|
||||
if [ -z "${{ github.event.inputs.streams }}" ]; then
|
||||
echo "STREAM_PARAMS=" >> $GITHUB_ENV
|
||||
else
|
||||
STREAMS=$(echo "${{ github.event.inputs.streams }}" | sed 's/,/ --connector_live_tests.selected-streams=/g')
|
||||
echo "STREAM_PARAMS=--connector_live_tests.selected-streams=$STREAMS" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- name: Setup Local CDK Flag
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: |
|
||||
if ${{ github.event.inputs.use_local_cdk }}; then
|
||||
echo "USE_LOCAL_CDK_FLAG=--use-local-cdk" >> $GITHUB_ENV
|
||||
else
|
||||
echo "USE_LOCAL_CDK_FLAG=" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- name: Setup State Flag
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: |
|
||||
if ${{ github.event.inputs.should_read_with_state }}; then
|
||||
echo "READ_WITH_STATE_FLAG=--connector_live_tests.should-read-with-state" >> $GITHUB_ENV
|
||||
else
|
||||
echo "READ_WITH_STATE_FLAG=" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- name: Setup Proxy Flag
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: |
|
||||
if ${{ github.event.inputs.disable_proxy }}; then
|
||||
echo "DISABLE_PROXY_FLAG=--connector_live_tests.disable-proxy" >> $GITHUB_ENV
|
||||
else
|
||||
echo "DISABLE_PROXY_FLAG=" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- name: Setup Connection Subset Option
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: |
|
||||
echo "CONNECTION_SUBSET=--connector_live_tests.connection-subset=${{ github.event.inputs.connection_subset }}" >> $GITHUB_ENV
|
||||
|
||||
- name: Setup Control Version
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: |
|
||||
if [ -n "${{ github.event.inputs.control_version }}" ]; then
|
||||
echo "CONTROL_VERSION=--connector_live_tests.control-version=${{ github.event.inputs.control_version }}" >> $GITHUB_ENV
|
||||
else
|
||||
echo "CONTROL_VERSION=" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
# NOTE: We still use a PAT here (rather than a GitHub App) because the workflow needs
|
||||
# permissions to add commits to our main repo as well as forks. This will only work on
|
||||
# forks if the user installs the app into their fork. Until we document this as a clear
|
||||
# path, we will have to keep using the PAT.
|
||||
- name: Run Regression Tests [WORKFLOW DISPATCH]
|
||||
if: github.event_name == 'workflow_dispatch' # TODO: consider using the matrix strategy (https://docs.github.com/en/actions/using-jobs/using-a-matrix-for-your-jobs). See https://github.com/airbytehq/airbyte/pull/37659#discussion_r1583380234 for details.
|
||||
uses: ./.github/actions/run-airbyte-ci
|
||||
with:
|
||||
context: "manual"
|
||||
dagger_cloud_token: ${{ secrets.DAGGER_CLOUD_TOKEN_CACHE_3 }}
|
||||
docker_hub_password: ${{ secrets.DOCKER_HUB_PASSWORD }}
|
||||
docker_hub_username: ${{ secrets.DOCKER_HUB_USERNAME }}
|
||||
gcp_gsm_credentials: ${{ secrets.GCP_GSM_CREDENTIALS }}
|
||||
gcp_integration_tester_credentials: ${{ secrets.GCLOUD_INTEGRATION_TESTER }}
|
||||
sentry_dsn: ${{ secrets.SENTRY_AIRBYTE_CI_DSN }}
|
||||
git_branch: ${{ steps.extract_branch.outputs.branch }}
|
||||
git_revision: ${{ steps.fetch_last_commit_id_pr.outputs.commit_id }}
|
||||
github_token: ${{ secrets.GH_PAT_MAINTENANCE_OSS }}
|
||||
s3_build_cache_access_key_id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }}
|
||||
s3_build_cache_secret_key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }}
|
||||
subcommand: connectors ${{ env.USE_LOCAL_CDK_FLAG }} --name ${{ github.event.inputs.connector_name }} test --only-step connector_live_tests --connector_live_tests.test-suite=regression --connector_live_tests.connection-id=${{ github.event.inputs.connection_id }} --connector_live_tests.pr-url=${{ github.event.inputs.pr_url }} ${{ env.READ_WITH_STATE_FLAG }} ${{ env.DISABLE_PROXY_FLAG }} ${{ env.STREAM_PARAMS }} ${{ env.CONNECTION_SUBSET }} ${{ env.CONTROL_VERSION }} --global-status-check-context="Regression Tests" --global-status-check-description='Running regression tests'
|
||||
5
.github/workflows/reviewdog.yml
vendored
5
.github/workflows/reviewdog.yml
vendored
@@ -32,9 +32,10 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- uses: errata-ai/vale-action@d89dee975228ae261d22c15adcd03578634d429c # Pinned to V2.1.1
|
||||
continue-on-error: true # Always pass, even if reviewdog can't post annotations (e.g., fork PRs with read-only tokens)
|
||||
with:
|
||||
vale_flags: --config=docusaurus/vale-ci.ini --minAlertLevel=warning # CI-specific config that disables certain rules (see vale-ci.ini vs vale.ini)
|
||||
vale_flags: --config=docusaurus/vale.ini --minAlertLevel=warning # Use vale.ini with minAlertLevel overridden to warning for CI
|
||||
files: docs/ # Folder in which to lint
|
||||
filter_mode: added # Only lint things that have changed
|
||||
fail_on_error: false # Don't fail if the linter finds issues (compliance is optional)
|
||||
reporter: github-pr-review # Post as annotations on the Changed Files page
|
||||
reporter: local # Output to job logs only, no PR annotations or comments
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
name: Connector CI - Run Live Validation Tests
|
||||
name: On-Demand Live Connector Validation Tests
|
||||
|
||||
concurrency:
|
||||
# This is the name of the concurrency group. It is used to prevent concurrent runs of the same workflow.
|
||||
@@ -14,17 +14,44 @@ concurrency:
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
connector_name:
|
||||
description: Connector name (e.g. source-faker)
|
||||
required: true
|
||||
# Global static-arg inputs for slash commands
|
||||
repo:
|
||||
description: "The repository name. Optional. Defaults to 'airbytehq/airbyte'."
|
||||
required: false
|
||||
default: "airbytehq/airbyte"
|
||||
type: string
|
||||
gitref:
|
||||
description: "The git reference (branch or tag). Optional. Defaults to the default branch."
|
||||
required: false
|
||||
type: string
|
||||
comment-id:
|
||||
description: "The ID of the comment triggering the workflow. Optional."
|
||||
required: false
|
||||
type: number
|
||||
pr:
|
||||
description: "The pull request number, if applicable. Optional."
|
||||
required: false
|
||||
type: number
|
||||
|
||||
# Workflow-specific inputs
|
||||
connector_filter:
|
||||
description: >
|
||||
Connector filter. Will be passed to the `airbyte-ci connectors` command.
|
||||
To select all modified connectors, use '--modified'. To select specific connectors,
|
||||
pass one or or more `--name` args, e.g. '--name=source-faker --name=source-hardcoded-records'.
|
||||
default: "--modified"
|
||||
connection_id:
|
||||
description: ID of the connection to test; use "auto" to let the connection retriever choose a connection
|
||||
required: true
|
||||
pr_url:
|
||||
description: URL of the PR containing the code change
|
||||
required: true
|
||||
description: >
|
||||
Connection ID. ID of the connection to test; use "auto" to let the
|
||||
connection retriever choose a connection.
|
||||
default: "auto"
|
||||
streams:
|
||||
description: Streams to include in tests
|
||||
description: >
|
||||
(Optional) Streams. Which streams to include in tests.
|
||||
If not set, these will be chosen automatically.
|
||||
required: false
|
||||
default: ""
|
||||
type: string
|
||||
should_read_with_state:
|
||||
description: Whether to run tests against the read command with state
|
||||
default: "true"
|
||||
@@ -37,13 +64,16 @@ on:
|
||||
description: Disable proxy for requests
|
||||
default: "false"
|
||||
type: boolean
|
||||
connection_subset:
|
||||
description: The subset of connections to select from.
|
||||
required: true
|
||||
type: choice
|
||||
options:
|
||||
- sandboxes
|
||||
- all
|
||||
|
||||
# Workaround: GitHub currently supports a max of 10 inputs for workflow_dispatch events.
|
||||
# We need to consolidate some inputs to stay within this limit.
|
||||
# connection_subset:
|
||||
# description: The subset of connections to select from.
|
||||
# default: "sandboxes"
|
||||
# type: choice
|
||||
# options:
|
||||
# - sandboxes
|
||||
# - all
|
||||
|
||||
jobs:
|
||||
live_tests:
|
||||
@@ -119,7 +149,10 @@ jobs:
|
||||
- name: Setup Connection Subset Option
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: |
|
||||
echo "CONNECTION_SUBSET=--connector_live_tests.connection-subset=${{ github.event.inputs.connection_subset }}" >> $GITHUB_ENV
|
||||
echo "CONNECTION_SUBSET=--connector_live_tests.connection-subset=sandboxes" >> $GITHUB_ENV
|
||||
# TODO: re-enable when we have resolved the more-than-10-inputs issue in workflow_dispatch.
|
||||
# run: |
|
||||
# echo "CONNECTION_SUBSET=--connector_live_tests.connection-subset=${{ github.event.inputs.connection_subset }}" >> $GITHUB_ENV
|
||||
|
||||
# NOTE: We still use a PAT here (rather than a GitHub App) because the workflow needs
|
||||
# permissions to add commits to our main repo as well as forks. This will only work on
|
||||
@@ -141,4 +174,4 @@ jobs:
|
||||
github_token: ${{ secrets.GH_PAT_MAINTENANCE_OSS }}
|
||||
s3_build_cache_access_key_id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }}
|
||||
s3_build_cache_secret_key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }}
|
||||
subcommand: connectors ${{ env.USE_LOCAL_CDK_FLAG }} --name ${{ github.event.inputs.connector_name }} test --only-step connector_live_tests --connector_live_tests.test-suite=live --connector_live_tests.connection-id=${{ github.event.inputs.connection_id }} --connector_live_tests.pr-url=${{ github.event.inputs.pr_url }} ${{ env.READ_WITH_STATE_FLAG }} ${{ env.DISABLE_PROXY_FLAG }} ${{ env.STREAM_PARAMS }} ${{ env.CONNECTION_SUBSET }}
|
||||
subcommand: connectors ${{ env.USE_LOCAL_CDK_FLAG }} ${{ inputs.connector_filter }} test --only-step connector_live_tests --connector_live_tests.test-suite=live --connector_live_tests.connection-id=${{ github.event.inputs.connection_id }} --connector_live_tests.pr-url="https://github.com/airbytehq/airbyte/pull/${{ github.event.inputs.pr }}" ${{ env.READ_WITH_STATE_FLAG }} ${{ env.DISABLE_PROXY_FLAG }} ${{ env.STREAM_PARAMS }} ${{ env.CONNECTION_SUBSET }}
|
||||
315
.github/workflows/run-regression-tests-command.yml
vendored
Normal file
315
.github/workflows/run-regression-tests-command.yml
vendored
Normal file
@@ -0,0 +1,315 @@
|
||||
name: On-Demand Connector Regression Tests
|
||||
|
||||
concurrency:
|
||||
# This is the name of the concurrency group. It is used to prevent concurrent runs of the same workflow.
|
||||
#
|
||||
# - github.head_ref is only defined on PR runs, it makes sure that the concurrency group is unique for pull requests
|
||||
# ensuring that only one run per pull request is active at a time.
|
||||
#
|
||||
# - github.run_id is defined on all runs, it makes sure that the concurrency group is unique for workflow dispatches.
|
||||
# This allows us to run multiple workflow dispatches in parallel.
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
# Global static-arg inputs for slash commands
|
||||
repo:
|
||||
description: "The repository name"
|
||||
required: false
|
||||
default: "airbytehq/airbyte"
|
||||
type: string
|
||||
gitref:
|
||||
description: "The git reference (branch or tag)"
|
||||
required: false
|
||||
type: string
|
||||
comment-id:
|
||||
description: "The ID of the comment triggering the workflow"
|
||||
required: false
|
||||
type: number
|
||||
pr:
|
||||
description: "The pull request number, if applicable"
|
||||
required: false
|
||||
type: number
|
||||
|
||||
# Workflow-specific inputs
|
||||
connector_filter:
|
||||
description: >
|
||||
Connector filter. Will be passed to the `airbyte-ci connectors` command.
|
||||
To select all modified connectors, use '--modified'. To select specific connectors,
|
||||
pass one or or more `--name` args, e.g. '--name=source-faker --name=source-hardcoded-records'.
|
||||
default: "--modified"
|
||||
connection_id:
|
||||
description: >
|
||||
Connection ID. ID of the connection to test; use "auto" to let the
|
||||
connection retriever choose a connection.
|
||||
default: "auto"
|
||||
streams:
|
||||
description: >
|
||||
(Optional) Streams. Which streams to include in tests.
|
||||
If not set, these will be chosen automatically.
|
||||
required: false
|
||||
default: ""
|
||||
type: string
|
||||
should_read_with_state:
|
||||
description: Whether to run tests against the read command with state
|
||||
default: "true"
|
||||
type: boolean
|
||||
use_local_cdk:
|
||||
description: Use the local CDK when building the target connector
|
||||
default: "false"
|
||||
type: boolean
|
||||
disable_proxy:
|
||||
description: Disable proxy for requests
|
||||
default: "false"
|
||||
type: boolean
|
||||
|
||||
# Workaround: GitHub currently supports a max of 10 inputs for workflow_dispatch events.
|
||||
# We need to consolidate some inputs to stay within this limit.
|
||||
# connection_subset:
|
||||
# description: The subset of connections to select from.
|
||||
# default: "sandboxes"
|
||||
# type: choice
|
||||
# options:
|
||||
# - sandboxes
|
||||
# - all
|
||||
# control_version:
|
||||
# description: The version to use as a control version. This is useful when the version defined in the cloud registry does not have a lot of usage (either because a progressive rollout is underway or because a new version has just been released).
|
||||
# required: false
|
||||
# type: string
|
||||
|
||||
jobs:
|
||||
regression_tests:
|
||||
name: Regression Tests
|
||||
runs-on: linux-24.04-large # Custom runner, defined in GitHub org settings
|
||||
timeout-minutes: 360 # 6 hours
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
issues: write
|
||||
steps:
|
||||
- name: Append start with run link
|
||||
id: pr-comment-id
|
||||
if: github.event_name == 'workflow_dispatch' && github.event.inputs.pr != ''
|
||||
uses: peter-evans/create-or-update-comment@v4
|
||||
with:
|
||||
token: ${{ github.token }}
|
||||
issue-number: ${{ github.event.inputs.pr }}
|
||||
comment-id: ${{ github.event.inputs.comment-id }}
|
||||
edit-mode: append
|
||||
body: |
|
||||
> Starting regression tests (filter: `${{ github.event.inputs.connector_filter || '--modified' }}`)
|
||||
> Workflow run: [${{ github.run_id }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})
|
||||
|
||||
- name: Install Python
|
||||
id: install_python
|
||||
uses: actions/setup-python@7f4fc3e22c37d6ff65e88745f38bd3157c663f7c # v4.9.1
|
||||
with:
|
||||
python-version: "3.11"
|
||||
check-latest: true
|
||||
update-environment: true
|
||||
|
||||
- name: Checkout Airbyte
|
||||
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
|
||||
- name: Extract branch name [WORKFLOW DISPATCH]
|
||||
shell: bash
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: echo "branch=${GITHUB_REF#refs/heads/}" >> $GITHUB_OUTPUT
|
||||
id: extract_branch
|
||||
|
||||
- name: Install Poetry
|
||||
id: install_poetry
|
||||
uses: snok/install-poetry@76e04a911780d5b312d89783f7b1cd627778900a # v1.4.1
|
||||
with:
|
||||
version: 1.8.5
|
||||
|
||||
- name: Make poetry venv in project
|
||||
id: poetry_venv
|
||||
run: poetry config virtualenvs.in-project true
|
||||
|
||||
- name: Install Python packages
|
||||
id: install_python_packages
|
||||
working-directory: airbyte-ci/connectors/pipelines
|
||||
run: poetry install
|
||||
|
||||
- name: Fetch last commit id from remote branch [WORKFLOW DISPATCH]
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
id: fetch_last_commit_id_wd
|
||||
run: echo "commit_id=$(git rev-parse origin/${{ steps.extract_branch.outputs.branch }})" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Setup Stream Parameters
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: |
|
||||
if [ -z "${{ github.event.inputs.streams }}" ]; then
|
||||
echo "STREAM_PARAMS=" >> $GITHUB_ENV
|
||||
else
|
||||
STREAMS=$(echo "${{ github.event.inputs.streams }}" | sed 's/,/ --connector_live_tests.selected-streams=/g')
|
||||
echo "STREAM_PARAMS=--connector_live_tests.selected-streams=$STREAMS" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- name: Setup Local CDK Flag
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: |
|
||||
if ${{ github.event.inputs.use_local_cdk }}; then
|
||||
echo "USE_LOCAL_CDK_FLAG=--use-local-cdk" >> $GITHUB_ENV
|
||||
else
|
||||
echo "USE_LOCAL_CDK_FLAG=" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- name: Setup State Flag
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: |
|
||||
if ${{ github.event.inputs.should_read_with_state }}; then
|
||||
echo "READ_WITH_STATE_FLAG=--connector_live_tests.should-read-with-state" >> $GITHUB_ENV
|
||||
else
|
||||
echo "READ_WITH_STATE_FLAG=" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- name: Setup Proxy Flag
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: |
|
||||
if ${{ github.event.inputs.disable_proxy }}; then
|
||||
echo "DISABLE_PROXY_FLAG=--connector_live_tests.disable-proxy" >> $GITHUB_ENV
|
||||
else
|
||||
echo "DISABLE_PROXY_FLAG=" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- name: Setup Connection Subset Option
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: |
|
||||
echo "CONNECTION_SUBSET=--connector_live_tests.connection-subset=sandboxes" >> $GITHUB_ENV
|
||||
# TODO: re-enable when we have resolved the more-than-10-inputs issue in workflow_dispatch.
|
||||
# run: |
|
||||
# echo "CONNECTION_SUBSET=--connector_live_tests.connection-subset=${{ github.event.inputs.connection_subset }}" >> $GITHUB_ENV
|
||||
|
||||
- name: Setup Control Version
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: |
|
||||
echo "CONTROL_VERSION=" >> $GITHUB_ENV
|
||||
# TODO: re-enable when we have resolved the more-than-10-inputs issue in workflow_dispatch.
|
||||
# run: |
|
||||
# if [ -n "${{ github.event.inputs.control_version }}" ]; then
|
||||
# echo "CONTROL_VERSION=--connector_live_tests.control-version=${{ github.event.inputs.control_version }}" >> $GITHUB_ENV
|
||||
# else
|
||||
# echo "CONTROL_VERSION=" >> $GITHUB_ENV
|
||||
# fi
|
||||
|
||||
# NOTE: We still use a PAT here (rather than a GitHub App) because the workflow needs
|
||||
# permissions to add commits to our main repo as well as forks. This will only work on
|
||||
# forks if the user installs the app into their fork. Until we document this as a clear
|
||||
# path, we will have to keep using the PAT.
|
||||
- name: Run Regression Tests [WORKFLOW DISPATCH]
|
||||
id: run-regression-tests
|
||||
if: github.event_name == 'workflow_dispatch' # TODO: consider using the matrix strategy (https://docs.github.com/en/actions/using-jobs/using-a-matrix-for-your-jobs). See https://github.com/airbytehq/airbyte/pull/37659#discussion_r1583380234 for details.
|
||||
uses: ./.github/actions/run-airbyte-ci
|
||||
with:
|
||||
context: "manual"
|
||||
dagger_cloud_token: ${{ secrets.DAGGER_CLOUD_TOKEN_CACHE_3 }}
|
||||
docker_hub_password: ${{ secrets.DOCKER_HUB_PASSWORD }}
|
||||
docker_hub_username: ${{ secrets.DOCKER_HUB_USERNAME }}
|
||||
gcp_gsm_credentials: ${{ secrets.GCP_GSM_CREDENTIALS }}
|
||||
gcp_integration_tester_credentials: ${{ secrets.GCLOUD_INTEGRATION_TESTER }}
|
||||
sentry_dsn: ${{ secrets.SENTRY_AIRBYTE_CI_DSN }}
|
||||
git_branch: ${{ steps.extract_branch.outputs.branch }}
|
||||
git_revision: ${{ steps.fetch_last_commit_id_pr.outputs.commit_id }}
|
||||
github_token: ${{ secrets.GH_PAT_MAINTENANCE_OSS }}
|
||||
s3_build_cache_access_key_id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }}
|
||||
s3_build_cache_secret_key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }}
|
||||
subcommand: connectors ${{ env.USE_LOCAL_CDK_FLAG }} ${{ inputs.connector_filter }} test --only-step connector_live_tests --connector_live_tests.test-suite=regression --connector_live_tests.connection-id=${{ github.event.inputs.connection_id }} --connector_live_tests.pr-url="https://github.com/airbytehq/airbyte/pull/${{ github.event.inputs.pr }}" ${{ env.READ_WITH_STATE_FLAG }} ${{ env.DISABLE_PROXY_FLAG }} ${{ env.STREAM_PARAMS }} ${{ env.CONNECTION_SUBSET }} ${{ env.CONTROL_VERSION }} --global-status-check-context="Regression Tests" --global-status-check-description='Running regression tests'
|
||||
|
||||
- name: Locate regression test report
|
||||
if: always() && github.event_name == 'workflow_dispatch'
|
||||
id: locate-report
|
||||
run: |
|
||||
# Find the most recent report.html file in /tmp/live_tests_artifacts/
|
||||
REPORT_PATH=$(find /tmp/live_tests_artifacts -name "report.html" -type f -printf '%T@ %p\n' 2>/dev/null | sort -n | tail -1 | cut -f2- -d" ")
|
||||
if [ -n "$REPORT_PATH" ]; then
|
||||
echo "report_path=$REPORT_PATH" >> "$GITHUB_OUTPUT"
|
||||
echo "Found report at: $REPORT_PATH"
|
||||
else
|
||||
echo "report_path=" >> "$GITHUB_OUTPUT"
|
||||
echo "No report.html found in /tmp/live_tests_artifacts/"
|
||||
fi
|
||||
|
||||
- name: Upload regression test report
|
||||
if: always() && github.event_name == 'workflow_dispatch' && steps.locate-report.outputs.report_path != ''
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: regression-test-report
|
||||
path: ${{ steps.locate-report.outputs.report_path }}
|
||||
if-no-files-found: ignore
|
||||
|
||||
- name: Append regression outcome
|
||||
if: always() && github.event_name == 'workflow_dispatch' && github.event.inputs.pr != ''
|
||||
uses: peter-evans/create-or-update-comment@v4
|
||||
with:
|
||||
token: ${{ github.token }}
|
||||
comment-id: ${{ steps.pr-comment-id.outputs.comment-id }}
|
||||
edit-mode: append
|
||||
body: |
|
||||
> Regression tests: ${{ steps.run-regression-tests.outcome == 'success' && '✅ PASSED' || steps.run-regression-tests.outcome == 'failure' && '❌ FAILED' || steps.run-regression-tests.outcome == 'cancelled' && '⚠️ CANCELLED' || steps.run-regression-tests.outcome == 'skipped' && '⏭️ SKIPPED' || '❓ UNKNOWN' }}
|
||||
> Report: ${{ steps.locate-report.outputs.report_path != '' && 'artifact `regression-test-report` available in the run' || 'not generated' }}
|
||||
|
||||
- name: Install live-tests dependencies for LLM evaluation
|
||||
if: always() && github.event_name == 'workflow_dispatch'
|
||||
working-directory: airbyte-ci/connectors/live-tests
|
||||
run: poetry install
|
||||
|
||||
- name: Install and Start Ollama
|
||||
if: always() && github.event_name == 'workflow_dispatch'
|
||||
run: |
|
||||
curl -fsSL https://ollama.com/install.sh | sh
|
||||
ollama serve &
|
||||
sleep 5
|
||||
ollama pull llama3.2:3b
|
||||
echo "Ollama server started and model pulled"
|
||||
|
||||
- name: Evaluate Regression Test Report with LLM
|
||||
if: always() && github.event_name == 'workflow_dispatch' && steps.locate-report.outputs.report_path != ''
|
||||
id: llm-eval
|
||||
continue-on-error: true
|
||||
working-directory: airbyte-ci/connectors/live-tests
|
||||
env:
|
||||
OPENAI_API_KEY: ollama
|
||||
OPENAI_BASE_URL: http://127.0.0.1:11434/v1
|
||||
EVAL_MODEL: llama3.2:3b
|
||||
run: |
|
||||
set -u
|
||||
echo "ran=false" >> "$GITHUB_OUTPUT"
|
||||
echo "result=error" >> "$GITHUB_OUTPUT"
|
||||
|
||||
REPORT_PATH="${{ steps.locate-report.outputs.report_path }}"
|
||||
|
||||
if [ -z "$REPORT_PATH" ]; then
|
||||
echo "Error: No report path provided from locate-report step" >&2
|
||||
echo "## ⚠️ LLM Evaluation Skipped" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "No regression test report found. The tests may have failed to generate a report." >> "$GITHUB_STEP_SUMMARY"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Evaluating report at: $REPORT_PATH"
|
||||
|
||||
# Run the evaluation script
|
||||
OUT_JSON="$RUNNER_TEMP/llm_eval.json"
|
||||
poetry run python src/live_tests/regression_tests/llm_evaluation/evaluate_report.py \
|
||||
--report-path "$REPORT_PATH" \
|
||||
--output-json "$OUT_JSON"
|
||||
|
||||
# If we got here, script exit 0 and produced a judgment
|
||||
PASS=$(jq -r '.evaluation.pass' "$OUT_JSON")
|
||||
if [ "$PASS" = "true" ]; then RES="pass"; else RES="fail"; fi
|
||||
echo "ran=true" >> "$GITHUB_OUTPUT"
|
||||
echo "result=$RES" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Append LLM outcome
|
||||
if: always() && github.event_name == 'workflow_dispatch' && github.event.inputs.pr != ''
|
||||
env:
|
||||
EVAL_MODEL: llama3.2:3b
|
||||
uses: peter-evans/create-or-update-comment@v4
|
||||
with:
|
||||
token: ${{ github.token }}
|
||||
comment-id: ${{ steps.pr-comment-id.outputs.comment-id }}
|
||||
edit-mode: append
|
||||
body: |
|
||||
> LLM Evaluation: ${{ steps.llm-eval.outputs.ran == 'true' && (steps.llm-eval.outputs.result == 'pass' && '✅ PASS' || steps.llm-eval.outputs.result == 'fail' && '❌ FAIL' || '⚠️ ERROR') || '⚠️ Did not run' }}${{ steps.llm-eval.outputs.ran == 'true' && format(' (model: {0})', env.EVAL_MODEL) || '' }}
|
||||
7
.github/workflows/slash-commands.yml
vendored
7
.github/workflows/slash-commands.yml
vendored
@@ -35,16 +35,23 @@ jobs:
|
||||
issue-type: both
|
||||
|
||||
commands: |
|
||||
ai-canary-prerelease
|
||||
ai-prove-fix
|
||||
ai-release-watch
|
||||
approve-regression-tests
|
||||
bump-bulk-cdk-version
|
||||
bump-progressive-rollout-version
|
||||
bump-version
|
||||
build-connector-images
|
||||
connector-performance
|
||||
format-fix
|
||||
poe
|
||||
publish-connectors-prerelease
|
||||
publish-java-cdk
|
||||
run-cat-tests
|
||||
run-connector-tests
|
||||
run-live-tests
|
||||
run-regression-tests
|
||||
test-performance
|
||||
update-connector-cdk-version
|
||||
|
||||
|
||||
66
airbyte-cdk/bulk/CONTRIBUTING.md
Normal file
66
airbyte-cdk/bulk/CONTRIBUTING.md
Normal file
@@ -0,0 +1,66 @@
|
||||
# Contributing to the Kotlin Bulk CDK
|
||||
|
||||
Thank you for your interest in contributing to the Airbyte Kotlin Bulk CDK!
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- **JDK 21** (Java Development Kit) or higher
|
||||
- **Gradle** (uses the wrapper, no separate installation needed)
|
||||
|
||||
### If you need to install Java
|
||||
|
||||
```bash
|
||||
# Get sdkman (https://sdkman.io/)
|
||||
curl -s "https://get.sdkman.io" | bash
|
||||
source "$HOME/.sdkman/bin/sdkman-init.sh"
|
||||
|
||||
# Verify install
|
||||
sdk version
|
||||
|
||||
# Show available versions
|
||||
sdk list java | grep 21
|
||||
|
||||
# Install the latest and set as default
|
||||
sdk install java 21.0.9-zulu
|
||||
sdk default java 21.0.9-zulu
|
||||
```
|
||||
|
||||
## Generating Documentation
|
||||
|
||||
The Kotlin Bulk CDK uses [Dokka](https://kotlinlang.org/docs/dokka-introduction.html) to generate API documentation from KDoc comments.
|
||||
|
||||
**Published Documentation**: The latest API documentation is available at https://airbyte-kotlin-cdk.vercel.app/
|
||||
|
||||
### Generate Documentation Locally
|
||||
|
||||
```bash
|
||||
./gradlew :airbyte-cdk:bulk:docsGenerate
|
||||
```
|
||||
|
||||
This generates HTML documentation in `airbyte-cdk/bulk/build/dokka/htmlMultiModule/`.
|
||||
|
||||
### View Generated Documentation
|
||||
|
||||
```bash
|
||||
# macOS
|
||||
open airbyte-cdk/bulk/build/dokka/htmlMultiModule/index.html
|
||||
|
||||
# Linux
|
||||
xdg-open airbyte-cdk/bulk/build/dokka/htmlMultiModule/index.html
|
||||
```
|
||||
|
||||
## Other Useful Commands
|
||||
|
||||
```bash
|
||||
# Build all modules
|
||||
./gradlew :airbyte-cdk:bulk:bulkCdkBuild
|
||||
|
||||
# Run tests
|
||||
./gradlew :airbyte-cdk:bulk:test
|
||||
```
|
||||
|
||||
## More Information
|
||||
|
||||
For architecture, publishing, development workflow, and other details, see the [README](README.md).
|
||||
|
||||
For general Airbyte contribution guidelines, see the [main contributing guide](../../docs/contributing-to-airbyte/README.md).
|
||||
@@ -4,6 +4,9 @@ The Bulk CDK is the "new java CDK" that's currently incubating.
|
||||
As the name suggests, its purpose is to help develop connectors which extract or load data in bulk.
|
||||
The Bulk CDK is written in Kotlin and uses the Micronaut framework for dependency injection.
|
||||
|
||||
- **API Reference Docs**: [Kotlin CDK API Reference](https://airbyte-kotlin-cdk.vercel.app/)
|
||||
- **Contributing**: See [CONTRIBUTING.md](CONTRIBUTING.md).
|
||||
|
||||
## Structure
|
||||
|
||||
The Bulk CDK consists of a _core_ and a bunch of _toolkits_.
|
||||
|
||||
@@ -9,6 +9,10 @@ import org.gradle.api.tasks.TaskAction
|
||||
import org.gradle.api.tasks.options.Option
|
||||
import org.w3c.dom.Document
|
||||
|
||||
plugins {
|
||||
id 'org.jetbrains.dokka' version '2.0.0'
|
||||
}
|
||||
|
||||
final var versionFile = file("version.properties")
|
||||
|
||||
final var cdkVersion = {
|
||||
@@ -22,6 +26,7 @@ allprojects {
|
||||
version = cdkVersion
|
||||
apply plugin: 'java-library'
|
||||
apply plugin: 'maven-publish'
|
||||
apply plugin: 'org.jetbrains.dokka'
|
||||
|
||||
group 'io.airbyte.bulk-cdk'
|
||||
|
||||
@@ -79,6 +84,67 @@ allprojects {
|
||||
}
|
||||
}
|
||||
|
||||
// Configure Dokka for all subprojects
|
||||
subprojects {
|
||||
tasks.withType(org.jetbrains.dokka.gradle.DokkaTask.class) {
|
||||
dokkaSourceSets {
|
||||
configureEach {
|
||||
// Only document public APIs
|
||||
includeNonPublic.set(false)
|
||||
skipEmptyPackages.set(true)
|
||||
|
||||
// Report undocumented members
|
||||
reportUndocumented.set(true)
|
||||
|
||||
// Add external documentation links
|
||||
externalDocumentationLinks {
|
||||
create("kotlin") {
|
||||
url.set(uri("https://kotlinlang.org/api/latest/jvm/stdlib/").toURL())
|
||||
packageListUrl.set(uri("https://kotlinlang.org/api/latest/jvm/stdlib/package-list").toURL())
|
||||
}
|
||||
create("kotlinx-coroutines") {
|
||||
url.set(uri("https://kotlinlang.org/api/kotlinx.coroutines/").toURL())
|
||||
}
|
||||
create("micronaut") {
|
||||
url.set(uri("https://docs.micronaut.io/latest/api/").toURL())
|
||||
}
|
||||
}
|
||||
|
||||
// Source links back to GitHub
|
||||
sourceLink {
|
||||
localDirectory.set(file("src/main"))
|
||||
remoteUrl.set(uri("https://github.com/airbytehq/airbyte/tree/master/airbyte-cdk/bulk/${project.name}/src/main").toURL())
|
||||
remoteLineSuffix.set("#L")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Configure the multi-module documentation task
|
||||
tasks.named('dokkaHtmlMultiModule') {
|
||||
moduleName.set("Airbyte Kotlin Bulk CDK")
|
||||
outputDirectory.set(layout.buildDirectory.dir("dokka/htmlMultiModule"))
|
||||
}
|
||||
|
||||
// Convenience task for local development
|
||||
tasks.register('docsGenerate') {
|
||||
group = 'documentation'
|
||||
description = 'Generate Dokka documentation for all modules'
|
||||
dependsOn 'dokkaHtmlMultiModule'
|
||||
|
||||
doLast {
|
||||
println "Documentation generated at: ${layout.buildDirectory.dir("dokka/htmlMultiModule").get()}"
|
||||
}
|
||||
}
|
||||
|
||||
// Backwards-compatible alias
|
||||
tasks.register('dokkaGenerate') {
|
||||
group = 'documentation'
|
||||
description = 'Generate Dokka documentation for all modules (alias for docsGenerate)'
|
||||
dependsOn 'docsGenerate'
|
||||
}
|
||||
|
||||
tasks.register('checkBuildNumber') {
|
||||
description = "Check that the version doesn't exist"
|
||||
|
||||
|
||||
@@ -1,3 +1,50 @@
|
||||
## Version 0.1.88
|
||||
|
||||
**Load CDK**
|
||||
|
||||
* Add CDC_CURSOR_COLUMN_NAME constant.
|
||||
|
||||
## Version 0.1.87
|
||||
|
||||
**Load CDK**
|
||||
|
||||
* Properly call NamespaceMapper before calculating final table names.
|
||||
|
||||
## Version 0.1.86
|
||||
|
||||
**Load CDK**
|
||||
|
||||
* Adds toFinalSchema "escape hatch" for final table schema munging
|
||||
* Refactored Component test fixtures to require explicit StreamTableSchema creation using TableSchemaFactory
|
||||
|
||||
## Version 0.1.85
|
||||
|
||||
**Extract CDK**
|
||||
|
||||
* Fix CDC partition reader race condition when draining records after debezium shutdown.
|
||||
|
||||
## Version 0.1.84
|
||||
|
||||
load cdk: Move most DB packages into core. Refactor table schema interface into TableSchemaMapper.
|
||||
|
||||
## Version 0.1.83
|
||||
|
||||
load cdk: more tests to help guide dependency injection dependency implementations
|
||||
|
||||
## Version 0.1.82
|
||||
|
||||
load cdk: components tests: more schema evolution testcases
|
||||
|
||||
## Version 0.1.81
|
||||
|
||||
load cdk: components tests: more coverage on upsert
|
||||
|
||||
## Version 0.1.80
|
||||
|
||||
**Extract CDK**
|
||||
|
||||
* Fix default partition_id value for `CheckpointOnlyPartitionReader`.
|
||||
|
||||
## Version 0.1.79
|
||||
|
||||
**Extract CDK**
|
||||
|
||||
@@ -13,6 +13,7 @@ kotlin {
|
||||
|
||||
dependencies {
|
||||
api("com.github.f4b6a3:uuid-creator:6.1.1")
|
||||
implementation 'commons-codec:commons-codec:1.16.0'
|
||||
|
||||
implementation project(':airbyte-cdk:bulk:core:bulk-cdk-core-base')
|
||||
implementation 'org.apache.commons:commons-lang3:3.17.0'
|
||||
|
||||
@@ -165,6 +165,7 @@ abstract class BaseMockBasicFunctionalityIntegrationTest(
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
val e =
|
||||
assertThrows<DestinationUncleanExitException> {
|
||||
@@ -202,6 +203,7 @@ abstract class BaseMockBasicFunctionalityIntegrationTest(
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
|
||||
val returnedMessages =
|
||||
@@ -324,7 +326,8 @@ abstract class BaseMockBasicFunctionalityIntegrationTest(
|
||||
namespaceDefinitionType = namespaceMappingConfig.namespaceDefinitionType,
|
||||
streamPrefix = namespaceMappingConfig.streamPrefix,
|
||||
namespaceFormat = namespaceMappingConfig.namespaceFormat
|
||||
)
|
||||
),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
namespaceValidator(
|
||||
stream.unmappedNamespace,
|
||||
|
||||
@@ -12,6 +12,10 @@ import io.airbyte.cdk.load.command.NamespaceMapper
|
||||
import io.airbyte.cdk.load.data.ObjectTypeWithoutSchema
|
||||
import io.airbyte.cdk.load.message.DestinationRecordStreamComplete
|
||||
import io.airbyte.cdk.load.message.InputRecord
|
||||
import io.airbyte.cdk.load.schema.model.ColumnSchema
|
||||
import io.airbyte.cdk.load.schema.model.StreamTableSchema
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import io.airbyte.cdk.load.schema.model.TableNames
|
||||
import io.airbyte.cdk.load.util.serializeToString
|
||||
import io.airbyte.cdk.load.write.WriteOperation
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
@@ -46,7 +50,18 @@ interface DestinationChecker<C : DestinationConfiguration> {
|
||||
generationId = 1,
|
||||
minimumGenerationId = 0,
|
||||
syncId = 1,
|
||||
namespaceMapper = NamespaceMapper()
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
tableSchema =
|
||||
StreamTableSchema(
|
||||
tableNames = TableNames(finalTableName = TableName("testing", "test")),
|
||||
columnSchema =
|
||||
ColumnSchema(
|
||||
inputSchema = mapOf(),
|
||||
inputToFinalColumnNames = mapOf(),
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = Append,
|
||||
)
|
||||
)
|
||||
|
||||
fun check(config: C)
|
||||
|
||||
@@ -10,10 +10,15 @@ import io.airbyte.cdk.load.config.CHECK_STREAM_NAMESPACE
|
||||
import io.airbyte.cdk.load.data.FieldType
|
||||
import io.airbyte.cdk.load.data.IntegerType
|
||||
import io.airbyte.cdk.load.data.ObjectType
|
||||
import io.airbyte.cdk.load.schema.TableNameResolver
|
||||
import io.airbyte.cdk.load.schema.model.ColumnSchema
|
||||
import io.airbyte.cdk.load.schema.model.StreamTableSchema
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import io.airbyte.cdk.load.schema.model.TableNames
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import io.micronaut.context.annotation.Factory
|
||||
import io.micronaut.context.annotation.Value
|
||||
import io.micronaut.context.annotation.Requires
|
||||
import jakarta.inject.Named
|
||||
import jakarta.inject.Singleton
|
||||
import java.time.LocalDate
|
||||
@@ -91,45 +96,81 @@ data class DestinationCatalog(val streams: List<DestinationStream> = emptyList()
|
||||
}
|
||||
}
|
||||
|
||||
interface DestinationCatalogFactory {
|
||||
fun make(): DestinationCatalog
|
||||
}
|
||||
|
||||
@Factory
|
||||
class DefaultDestinationCatalogFactory {
|
||||
@Requires(property = Operation.PROPERTY, notEquals = "check")
|
||||
@Singleton
|
||||
fun getDestinationCatalog(
|
||||
fun syncCatalog(
|
||||
catalog: ConfiguredAirbyteCatalog,
|
||||
streamFactory: DestinationStreamFactory,
|
||||
@Value("\${${Operation.PROPERTY}}") operation: String,
|
||||
tableNameResolver: TableNameResolver,
|
||||
namespaceMapper: NamespaceMapper,
|
||||
): DestinationCatalog {
|
||||
// we resolve the table names with the properly mapped descriptors
|
||||
val mappedDescriptors =
|
||||
catalog.streams.map { namespaceMapper.map(it.stream.namespace, it.stream.name) }.toSet()
|
||||
val names = tableNameResolver.getTableNameMapping(mappedDescriptors)
|
||||
|
||||
require(
|
||||
names.size == catalog.streams.size,
|
||||
{ "Invariant violation: An incomplete table name mapping was generated." }
|
||||
)
|
||||
|
||||
return DestinationCatalog(
|
||||
streams =
|
||||
catalog.streams.map {
|
||||
val key = namespaceMapper.map(it.stream.namespace, it.stream.name)
|
||||
streamFactory.make(it, names[key]!!)
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Warning: Most destinations do not use this.
|
||||
*
|
||||
* Catalog stub for running SYNC from within a CHECK operation.
|
||||
*
|
||||
* Used exclusively by the DefaultDestinationChecker.
|
||||
*/
|
||||
@Requires(property = Operation.PROPERTY, value = "check")
|
||||
@Singleton
|
||||
fun checkCatalog(
|
||||
@Named("checkNamespace") checkNamespace: String?,
|
||||
namespaceMapper: NamespaceMapper
|
||||
): DestinationCatalog {
|
||||
if (operation == "check") {
|
||||
// generate a string like "20240523"
|
||||
val date = LocalDate.now().format(DateTimeFormatter.ofPattern("yyyyMMdd"))
|
||||
// generate 5 random characters
|
||||
val random = RandomStringUtils.insecure().nextAlphabetic(5).lowercase()
|
||||
val namespace = checkNamespace ?: "${CHECK_STREAM_NAMESPACE}_$date$random"
|
||||
return DestinationCatalog(
|
||||
listOf(
|
||||
DestinationStream(
|
||||
unmappedNamespace = namespace,
|
||||
unmappedName = "test$date$random",
|
||||
importType = Append,
|
||||
schema =
|
||||
ObjectType(
|
||||
linkedMapOf("test" to FieldType(IntegerType, nullable = true))
|
||||
),
|
||||
generationId = 1,
|
||||
minimumGenerationId = 0,
|
||||
syncId = 1,
|
||||
namespaceMapper = namespaceMapper
|
||||
)
|
||||
// generate a string like "20240523"
|
||||
val date = LocalDate.now().format(DateTimeFormatter.ofPattern("yyyyMMdd"))
|
||||
// generate 5 random characters
|
||||
val random = RandomStringUtils.insecure().nextAlphabetic(5).lowercase()
|
||||
val namespace = checkNamespace ?: "${CHECK_STREAM_NAMESPACE}_$date$random"
|
||||
return DestinationCatalog(
|
||||
listOf(
|
||||
DestinationStream(
|
||||
unmappedNamespace = namespace,
|
||||
unmappedName = "test$date$random",
|
||||
importType = Append,
|
||||
schema =
|
||||
ObjectType(linkedMapOf("test" to FieldType(IntegerType, nullable = true))),
|
||||
generationId = 1,
|
||||
minimumGenerationId = 0,
|
||||
syncId = 1,
|
||||
namespaceMapper = namespaceMapper,
|
||||
tableSchema =
|
||||
StreamTableSchema(
|
||||
columnSchema =
|
||||
ColumnSchema(
|
||||
inputSchema = mapOf(),
|
||||
inputToFinalColumnNames = mapOf(),
|
||||
finalSchema = mapOf()
|
||||
),
|
||||
importType = Append,
|
||||
tableNames =
|
||||
TableNames(
|
||||
finalTableName = TableName("namespace", "test"),
|
||||
),
|
||||
),
|
||||
)
|
||||
)
|
||||
} else {
|
||||
return DestinationCatalog(streams = catalog.streams.map { streamFactory.make(it) })
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,15 +9,17 @@ import io.airbyte.cdk.load.data.AirbyteValueProxy
|
||||
import io.airbyte.cdk.load.data.ObjectType
|
||||
import io.airbyte.cdk.load.data.collectUnknownPaths
|
||||
import io.airbyte.cdk.load.data.json.AirbyteTypeToJsonSchema
|
||||
import io.airbyte.cdk.load.data.json.JsonSchemaToAirbyteType
|
||||
import io.airbyte.cdk.load.message.DestinationRecord
|
||||
import io.airbyte.cdk.load.message.Meta
|
||||
import io.airbyte.cdk.load.schema.model.StreamTableSchema
|
||||
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange
|
||||
import io.airbyte.protocol.models.v0.AirbyteStream
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream
|
||||
import io.airbyte.protocol.models.v0.DestinationSyncMode
|
||||
import io.airbyte.protocol.models.v0.StreamDescriptor
|
||||
import jakarta.inject.Singleton
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
|
||||
private val log = KotlinLogging.logger {}
|
||||
|
||||
/**
|
||||
* Internal representation of destination streams. This is intended to be a case class specialized
|
||||
@@ -64,7 +66,8 @@ data class DestinationStream(
|
||||
val includeFiles: Boolean = false,
|
||||
val destinationObjectName: String? = null,
|
||||
val matchingKey: List<String>? = null,
|
||||
private val namespaceMapper: NamespaceMapper
|
||||
private val namespaceMapper: NamespaceMapper,
|
||||
val tableSchema: StreamTableSchema,
|
||||
) {
|
||||
val unmappedDescriptor = Descriptor(namespace = unmappedNamespace, name = unmappedName)
|
||||
val mappedDescriptor = namespaceMapper.map(namespace = unmappedNamespace, name = unmappedName)
|
||||
@@ -181,58 +184,6 @@ fun AirbyteType.computeUnknownColumnChanges() =
|
||||
)
|
||||
}
|
||||
|
||||
@Singleton
|
||||
class DestinationStreamFactory(
|
||||
private val jsonSchemaToAirbyteType: JsonSchemaToAirbyteType,
|
||||
private val namespaceMapper: NamespaceMapper
|
||||
) {
|
||||
fun make(stream: ConfiguredAirbyteStream): DestinationStream {
|
||||
return DestinationStream(
|
||||
unmappedNamespace = stream.stream.namespace,
|
||||
unmappedName = stream.stream.name,
|
||||
namespaceMapper = namespaceMapper,
|
||||
importType =
|
||||
when (stream.destinationSyncMode) {
|
||||
null -> throw IllegalArgumentException("Destination sync mode was null")
|
||||
DestinationSyncMode.APPEND -> Append
|
||||
DestinationSyncMode.OVERWRITE -> Overwrite
|
||||
DestinationSyncMode.APPEND_DEDUP ->
|
||||
Dedupe(primaryKey = stream.primaryKey, cursor = stream.cursorField)
|
||||
DestinationSyncMode.UPDATE -> Update
|
||||
DestinationSyncMode.SOFT_DELETE -> SoftDelete
|
||||
},
|
||||
generationId = stream.generationId,
|
||||
minimumGenerationId = stream.minimumGenerationId,
|
||||
syncId = stream.syncId,
|
||||
schema = jsonSchemaToAirbyteType.convert(stream.stream.jsonSchema),
|
||||
isFileBased = stream.stream.isFileBased ?: false,
|
||||
includeFiles = stream.includeFiles ?: false,
|
||||
destinationObjectName = stream.destinationObjectName,
|
||||
matchingKey =
|
||||
stream.destinationObjectName?.let {
|
||||
fromCompositeNestedKeyToCompositeKey(stream.primaryKey)
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private fun fromCompositeNestedKeyToCompositeKey(
|
||||
compositeNestedKey: List<List<String>>
|
||||
): List<String> {
|
||||
if (compositeNestedKey.any { it.size > 1 }) {
|
||||
throw IllegalArgumentException(
|
||||
"Nested keys are not supported for matching keys. Key was $compositeNestedKey"
|
||||
)
|
||||
}
|
||||
if (compositeNestedKey.any { it.isEmpty() }) {
|
||||
throw IllegalArgumentException(
|
||||
"Parts of the composite key need to have at least one element. Key was $compositeNestedKey"
|
||||
)
|
||||
}
|
||||
|
||||
return compositeNestedKey.map { it[0] }.toList()
|
||||
}
|
||||
|
||||
sealed interface ImportType
|
||||
|
||||
data object Append : ImportType
|
||||
|
||||
@@ -0,0 +1,89 @@
|
||||
/*
|
||||
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.command
|
||||
|
||||
import io.airbyte.cdk.load.data.FieldType
|
||||
import io.airbyte.cdk.load.data.ObjectType
|
||||
import io.airbyte.cdk.load.data.ObjectTypeWithEmptySchema
|
||||
import io.airbyte.cdk.load.data.ObjectTypeWithoutSchema
|
||||
import io.airbyte.cdk.load.data.json.JsonSchemaToAirbyteType
|
||||
import io.airbyte.cdk.load.schema.TableSchemaFactory
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream
|
||||
import io.airbyte.protocol.models.v0.DestinationSyncMode
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import jakarta.inject.Singleton
|
||||
|
||||
private val log = KotlinLogging.logger {}
|
||||
|
||||
@Singleton
|
||||
class DestinationStreamFactory(
|
||||
private val jsonSchemaToAirbyteType: JsonSchemaToAirbyteType,
|
||||
private val namespaceMapper: NamespaceMapper,
|
||||
private val schemaFactory: TableSchemaFactory,
|
||||
) {
|
||||
fun make(stream: ConfiguredAirbyteStream, resolvedTableName: TableName): DestinationStream {
|
||||
val airbyteSchemaType = jsonSchemaToAirbyteType.convert(stream.stream.jsonSchema)
|
||||
val airbyteSchema: Map<String, FieldType> =
|
||||
when (airbyteSchemaType) {
|
||||
is ObjectType -> airbyteSchemaType.properties
|
||||
is ObjectTypeWithEmptySchema,
|
||||
is ObjectTypeWithoutSchema -> emptyMap()
|
||||
else -> throw IllegalStateException("")
|
||||
}
|
||||
val importType =
|
||||
when (stream.destinationSyncMode) {
|
||||
null -> throw IllegalArgumentException("Destination sync mode was null")
|
||||
DestinationSyncMode.APPEND -> Append
|
||||
DestinationSyncMode.OVERWRITE -> Overwrite
|
||||
DestinationSyncMode.APPEND_DEDUP ->
|
||||
Dedupe(primaryKey = stream.primaryKey, cursor = stream.cursorField)
|
||||
DestinationSyncMode.UPDATE -> Update
|
||||
DestinationSyncMode.SOFT_DELETE -> SoftDelete
|
||||
}
|
||||
val tableSchema =
|
||||
schemaFactory.make(
|
||||
resolvedTableName,
|
||||
airbyteSchema,
|
||||
importType,
|
||||
)
|
||||
|
||||
return DestinationStream(
|
||||
unmappedNamespace = stream.stream.namespace,
|
||||
unmappedName = stream.stream.name,
|
||||
namespaceMapper = namespaceMapper,
|
||||
importType = importType,
|
||||
generationId = stream.generationId,
|
||||
minimumGenerationId = stream.minimumGenerationId,
|
||||
syncId = stream.syncId,
|
||||
schema = airbyteSchemaType,
|
||||
isFileBased = stream.stream.isFileBased ?: false,
|
||||
includeFiles = stream.includeFiles ?: false,
|
||||
destinationObjectName = stream.destinationObjectName,
|
||||
matchingKey =
|
||||
stream.destinationObjectName?.let {
|
||||
fromCompositeNestedKeyToCompositeKey(stream.primaryKey)
|
||||
},
|
||||
tableSchema = tableSchema,
|
||||
)
|
||||
}
|
||||
|
||||
private fun fromCompositeNestedKeyToCompositeKey(
|
||||
compositeNestedKey: List<List<String>>
|
||||
): List<String> {
|
||||
if (compositeNestedKey.any { it.size > 1 }) {
|
||||
throw IllegalArgumentException(
|
||||
"Nested keys are not supported for matching keys. Key was $compositeNestedKey",
|
||||
)
|
||||
}
|
||||
if (compositeNestedKey.any { it.isEmpty() }) {
|
||||
throw IllegalArgumentException(
|
||||
"Parts of the composite key need to have at least one element. Key was $compositeNestedKey",
|
||||
)
|
||||
}
|
||||
|
||||
return compositeNestedKey.map { it[0] }.toList()
|
||||
}
|
||||
}
|
||||
@@ -5,8 +5,8 @@
|
||||
package io.airbyte.cdk.load.component
|
||||
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||
import io.airbyte.cdk.load.table.TableName
|
||||
|
||||
/**
|
||||
* Client interface for database table operations.
|
||||
|
||||
@@ -5,8 +5,8 @@
|
||||
package io.airbyte.cdk.load.component
|
||||
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||
import io.airbyte.cdk.load.table.TableName
|
||||
import kotlin.collections.component1
|
||||
import kotlin.collections.component2
|
||||
import kotlin.collections.contains
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.dataflow.transform
|
||||
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
|
||||
/** Used by the CDK to pass the final column name to the aggregate buffer. */
|
||||
interface ColumnNameMapper {
|
||||
fun getMappedColumnName(stream: DestinationStream, columnName: String): String? = columnName
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.dataflow.transform.defaults
|
||||
|
||||
import io.airbyte.cdk.load.dataflow.transform.ColumnNameMapper
|
||||
import io.micronaut.context.annotation.Secondary
|
||||
import jakarta.inject.Singleton
|
||||
|
||||
/*
|
||||
* Default implementation of the ColumnNameMapper. If your destination needs destination-specific
|
||||
* column name mapping, create your own ColumnNameMapper implementation in your destination.
|
||||
*/
|
||||
@Singleton @Secondary class NoOpColumnNameMapper : ColumnNameMapper
|
||||
@@ -5,14 +5,12 @@
|
||||
package io.airbyte.cdk.load.dataflow.transform.medium
|
||||
|
||||
import io.airbyte.cdk.load.data.AirbyteValue
|
||||
import io.airbyte.cdk.load.dataflow.transform.ColumnNameMapper
|
||||
import io.airbyte.cdk.load.dataflow.transform.ValueCoercer
|
||||
import io.airbyte.cdk.load.dataflow.transform.data.ValidationResultHandler
|
||||
import jakarta.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class JsonConverter(
|
||||
private val columnNameMapper: ColumnNameMapper,
|
||||
private val coercer: ValueCoercer,
|
||||
private val validationResultHandler: ValidationResultHandler,
|
||||
) : MediumConverter {
|
||||
@@ -24,10 +22,7 @@ class JsonConverter(
|
||||
|
||||
val munged = HashMap<String, AirbyteValue>()
|
||||
enriched.declaredFields.forEach { field ->
|
||||
val mappedKey =
|
||||
columnNameMapper.getMappedColumnName(input.msg.stream, field.key)
|
||||
?: field.key // fallback to the original key
|
||||
|
||||
val mappedKey = enriched.stream.tableSchema.getFinalColumnName(field.key)
|
||||
val mappedValue =
|
||||
field.value
|
||||
.let { coercer.map(it) }
|
||||
|
||||
@@ -8,11 +8,9 @@ import io.airbyte.cdk.load.dataflow.state.PartitionKey
|
||||
import io.airbyte.cdk.load.message.DestinationRecordRaw
|
||||
|
||||
/**
|
||||
* Defines a contract for converting a given input into a structured map representation.
|
||||
* Converts raw destination records into a map of final column name to munged final value.
|
||||
*
|
||||
* This interface provides the blueprint for implementing a conversion process that transforms raw
|
||||
* destination record data, partitioning metadata, and optional source records into a map structure
|
||||
* with specific key-value pairs.
|
||||
* This interface provides the blueprint for different serialization intermediate representations.
|
||||
*/
|
||||
interface MediumConverter {
|
||||
/**
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
|
||||
package io.airbyte.cdk.load.dataflow.transform.medium
|
||||
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.data.AirbyteValue
|
||||
import io.airbyte.cdk.load.data.AirbyteValueProxy.FieldAccessor
|
||||
import io.airbyte.cdk.load.data.ArrayType
|
||||
@@ -38,10 +37,8 @@ import io.airbyte.cdk.load.data.TimestampWithoutTimezoneValue
|
||||
import io.airbyte.cdk.load.data.UnionType
|
||||
import io.airbyte.cdk.load.data.UnknownType
|
||||
import io.airbyte.cdk.load.data.json.toAirbyteValue
|
||||
import io.airbyte.cdk.load.dataflow.transform.ColumnNameMapper
|
||||
import io.airbyte.cdk.load.dataflow.transform.ValueCoercer
|
||||
import io.airbyte.cdk.load.dataflow.transform.data.ValidationResultHandler
|
||||
import io.airbyte.cdk.load.dataflow.transform.defaults.NoOpColumnNameMapper
|
||||
import io.airbyte.cdk.load.message.DestinationRecordProtobufSource
|
||||
import io.airbyte.cdk.load.message.DestinationRecordRaw
|
||||
import io.airbyte.cdk.load.message.Meta
|
||||
@@ -57,7 +54,6 @@ import java.time.LocalTime
|
||||
import java.time.OffsetDateTime
|
||||
import java.time.OffsetTime
|
||||
import java.time.ZoneOffset
|
||||
import java.util.concurrent.ConcurrentHashMap
|
||||
import javax.inject.Singleton
|
||||
|
||||
/**
|
||||
@@ -66,33 +62,12 @@ import javax.inject.Singleton
|
||||
*/
|
||||
@Singleton
|
||||
class ProtobufConverter(
|
||||
private val columnNameMapper: ColumnNameMapper,
|
||||
private val coercer: ValueCoercer,
|
||||
private val validationResultHandler: ValidationResultHandler,
|
||||
) : MediumConverter {
|
||||
|
||||
private val isNoOpMapper = columnNameMapper is NoOpColumnNameMapper
|
||||
private val decoder = AirbyteValueProtobufDecoder()
|
||||
|
||||
private val perStreamMappedNames =
|
||||
ConcurrentHashMap<DestinationStream.Descriptor, Array<String>>()
|
||||
|
||||
private fun mappedNamesFor(
|
||||
stream: DestinationStream,
|
||||
fieldAccessors: Array<FieldAccessor>
|
||||
): Array<String> {
|
||||
val key = stream.mappedDescriptor
|
||||
return perStreamMappedNames.computeIfAbsent(key) {
|
||||
val maxIndex = fieldAccessors.maxOfOrNull { it.index } ?: -1
|
||||
val arr = Array(maxIndex + 1) { "" }
|
||||
fieldAccessors.forEach { fa ->
|
||||
val mapped = columnNameMapper.getMappedColumnName(stream, fa.name) ?: fa.name
|
||||
arr[fa.index] = mapped
|
||||
}
|
||||
arr
|
||||
}
|
||||
}
|
||||
|
||||
override fun convert(input: ConversionInput): Map<String, AirbyteValue> {
|
||||
check(input.msg.rawData is DestinationRecordProtobufSource) {
|
||||
"The raw data must be a protobuf source."
|
||||
@@ -140,12 +115,8 @@ class ProtobufConverter(
|
||||
allParsingFailures.addAll(validatedValue.changes)
|
||||
|
||||
if (validatedValue.abValue !is NullValue || validatedValue.type !is UnknownType) {
|
||||
val columnName =
|
||||
if (isNoOpMapper) accessor.name
|
||||
else
|
||||
mappedNamesFor(stream, fieldAccessors).getOrElse(accessor.index) {
|
||||
accessor.name
|
||||
}
|
||||
// Use column mapping from stream
|
||||
val columnName = stream.tableSchema.getFinalColumnName(accessor.name)
|
||||
result[columnName] = validatedValue.abValue
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,7 +14,8 @@ import io.airbyte.cdk.load.data.ObjectType
|
||||
import io.airbyte.cdk.load.data.json.toAirbyteValue
|
||||
import io.airbyte.cdk.load.state.CheckpointId
|
||||
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange
|
||||
import java.util.*
|
||||
import java.util.SequencedMap
|
||||
import java.util.UUID
|
||||
import kotlin.collections.LinkedHashMap
|
||||
|
||||
data class DestinationRecordRaw(
|
||||
|
||||
@@ -0,0 +1,137 @@
|
||||
/*
|
||||
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.schema
|
||||
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import jakarta.inject.Singleton
|
||||
|
||||
/** Applies destination-specific column name munging logic and handles any naming collisions. */
|
||||
@Singleton
|
||||
class ColumnNameResolver(
|
||||
private val mapper: TableSchemaMapper,
|
||||
) {
|
||||
private val log = KotlinLogging.logger {}
|
||||
/**
|
||||
* Creates column name mapping with handling for potential collisions using incremental
|
||||
* numbering, with advanced resolution for truncation cases.
|
||||
*/
|
||||
fun getColumnNameMapping(inputColumNames: Set<String>): Map<String, String> {
|
||||
val processedColumnNames = mutableSetOf<String>()
|
||||
val columnMappings = mutableMapOf<String, String>()
|
||||
|
||||
inputColumNames.forEach { columnName ->
|
||||
val processedColumnName = mapper.toColumnName(columnName)
|
||||
|
||||
// Get a unique column name by adding incremental numbers if necessary
|
||||
val finalColumnName =
|
||||
resolveColumnNameCollision(
|
||||
processedColumnName,
|
||||
existingNames = processedColumnNames,
|
||||
originalColumnName = columnName,
|
||||
)
|
||||
|
||||
processedColumnNames.add(finalColumnName)
|
||||
columnMappings[columnName] = finalColumnName
|
||||
}
|
||||
|
||||
return columnMappings
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves column name collisions by first trying incremental suffixes (_1, _2, etc.) If that
|
||||
* doesn't work due to name truncation, uses the more powerful superResolveColumnCollisions.
|
||||
*
|
||||
* @param processedName The name after initial processing by the column name generator
|
||||
* @param existingNames Set of names already used for other columns
|
||||
* @param originalColumnName The original column name before processing
|
||||
*/
|
||||
private fun resolveColumnNameCollision(
|
||||
processedName: String,
|
||||
existingNames: Set<String>,
|
||||
originalColumnName: String,
|
||||
): String {
|
||||
// If processed name is unique, use it
|
||||
if (!hasConflict(existingNames, processedName)) {
|
||||
return processedName
|
||||
}
|
||||
|
||||
log.info { "Detected column name collision for $originalColumnName" }
|
||||
|
||||
// Try adding incremental suffixes until we find a non-colliding name
|
||||
var counter = 1
|
||||
var candidateName: String
|
||||
var previousCandidate = processedName
|
||||
|
||||
do {
|
||||
// Generate candidate name by adding numeric suffix
|
||||
candidateName = mapper.toColumnName("${originalColumnName}_$counter")
|
||||
|
||||
// Check if we're making progress (detecting potential truncation)
|
||||
if (colsConflict(candidateName, previousCandidate)) {
|
||||
// We're not making progress, likely due to name truncation
|
||||
// Use the more powerful resolution method with the ORIGINAL column name
|
||||
return superResolveColumnCollisions(
|
||||
originalColumnName,
|
||||
existingNames,
|
||||
processedName.length,
|
||||
)
|
||||
}
|
||||
|
||||
previousCandidate = candidateName
|
||||
counter++
|
||||
} while (existingNames.any { colsConflict(it, candidateName) })
|
||||
|
||||
return candidateName
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a name of the format `<prefix><length><suffix>` when simple suffix-based conflict
|
||||
* resolution fails due to name truncation. E.g. for affixLength=3: "veryLongName" -> "ver6ame"
|
||||
*
|
||||
* @param originalName The original column name that caused collision
|
||||
* @param existingNames Set of existing column names to avoid collision with
|
||||
* @param maximumColumnNameLength The maximum allowed length for the column name
|
||||
*/
|
||||
private fun superResolveColumnCollisions(
|
||||
originalName: String,
|
||||
existingNames: Set<String>,
|
||||
maximumColumnNameLength: Int,
|
||||
): String {
|
||||
// Assume that the <length> portion can be expressed in at most 5 characters.
|
||||
// If someone is giving us a column name that's longer than 99999 characters,
|
||||
// that's just being silly.
|
||||
val affixLength = (maximumColumnNameLength - 5) / 2
|
||||
|
||||
// If, after reserving 5 characters for the length, we can't fit the affixes,
|
||||
// just give up. That means the destination is trying to restrict us to a
|
||||
// 6-character column name, which is just silly.
|
||||
if (affixLength <= 0) {
|
||||
throw IllegalArgumentException(
|
||||
"Cannot solve column name collision: $originalName. We recommend removing this column to continue syncing.",
|
||||
)
|
||||
}
|
||||
|
||||
val prefix = originalName.take(affixLength)
|
||||
val suffix = originalName.substring(originalName.length - affixLength, originalName.length)
|
||||
|
||||
val length = originalName.length - 2 * affixLength
|
||||
val newColumnName = mapper.toColumnName("$prefix$length$suffix")
|
||||
|
||||
// If there's still a collision after this, just give up.
|
||||
// We could try to be more clever, but this is already a pretty rare case.
|
||||
if (hasConflict(existingNames, newColumnName)) {
|
||||
throw IllegalArgumentException(
|
||||
"Cannot solve column name collision: $originalName. We recommend removing this column to continue syncing.",
|
||||
)
|
||||
}
|
||||
|
||||
return newColumnName
|
||||
}
|
||||
|
||||
fun colsConflict(a: String, b: String): Boolean = mapper.colsConflict(a, b)
|
||||
|
||||
fun hasConflict(existingNames: Set<String>, candidate: String) =
|
||||
existingNames.any { colsConflict(it, candidate) }
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.schema
|
||||
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import jakarta.inject.Singleton
|
||||
import org.apache.commons.codec.digest.DigestUtils
|
||||
|
||||
/** Applies destination-specific table name munging logic and handles any naming collisions. */
|
||||
@Singleton
|
||||
class TableNameResolver(
|
||||
private val mapper: TableSchemaMapper,
|
||||
) {
|
||||
private val log = KotlinLogging.logger {}
|
||||
|
||||
fun getTableNameMapping(
|
||||
streamDescriptors: Set<DestinationStream.Descriptor>,
|
||||
): Map<DestinationStream.Descriptor, TableName> {
|
||||
val processedFinalTableNames = mutableSetOf<TableName>()
|
||||
|
||||
val result = mutableMapOf<DestinationStream.Descriptor, TableName>()
|
||||
|
||||
streamDescriptors.forEach { desc ->
|
||||
val originalFinalTableName = mapper.toFinalTableName(desc)
|
||||
val currentFinalProcessedName: TableName
|
||||
|
||||
val finalTableNameColliding = originalFinalTableName in processedFinalTableNames
|
||||
if (finalTableNameColliding) {
|
||||
log.info { "Detected table name collision for ${desc.namespace}.${desc.name}" }
|
||||
// Create a hash-suffixed name to avoid collision
|
||||
val hash =
|
||||
DigestUtils.sha1Hex(
|
||||
"${originalFinalTableName.namespace}&airbyte&${desc.name}",
|
||||
)
|
||||
.substring(0, 3)
|
||||
val newName = "${desc.name}_$hash"
|
||||
|
||||
currentFinalProcessedName =
|
||||
mapper.toFinalTableName(
|
||||
desc.copy(name = newName),
|
||||
)
|
||||
processedFinalTableNames.add(currentFinalProcessedName)
|
||||
} else {
|
||||
processedFinalTableNames.add(originalFinalTableName)
|
||||
currentFinalProcessedName = originalFinalTableName
|
||||
}
|
||||
|
||||
result[desc] = currentFinalProcessedName
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.schema
|
||||
|
||||
import io.airbyte.cdk.load.command.ImportType
|
||||
import io.airbyte.cdk.load.data.FieldType
|
||||
import io.airbyte.cdk.load.schema.model.ColumnSchema
|
||||
import io.airbyte.cdk.load.schema.model.StreamTableSchema
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import io.airbyte.cdk.load.schema.model.TableNames
|
||||
import jakarta.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class TableSchemaFactory(
|
||||
private val mapper: TableSchemaMapper,
|
||||
private val colNameResolver: ColumnNameResolver,
|
||||
) {
|
||||
fun make(
|
||||
finalTableName: TableName,
|
||||
inputSchema: Map<String, FieldType>,
|
||||
importType: ImportType,
|
||||
): StreamTableSchema {
|
||||
val tempTableName = mapper.toTempTableName(finalTableName)
|
||||
val tableNames =
|
||||
TableNames(
|
||||
finalTableName = finalTableName,
|
||||
tempTableName = tempTableName,
|
||||
)
|
||||
|
||||
val inputToFinalColumnNames = colNameResolver.getColumnNameMapping(inputSchema.keys)
|
||||
val finalSchema =
|
||||
inputSchema
|
||||
.map { inputToFinalColumnNames[it.key]!! to mapper.toColumnType(it.value) }
|
||||
.toMap()
|
||||
|
||||
val columnSchema =
|
||||
ColumnSchema(
|
||||
inputSchema = inputSchema,
|
||||
inputToFinalColumnNames = inputToFinalColumnNames,
|
||||
finalSchema = finalSchema,
|
||||
)
|
||||
|
||||
val tableSchema =
|
||||
StreamTableSchema(
|
||||
tableNames,
|
||||
columnSchema,
|
||||
importType,
|
||||
)
|
||||
|
||||
return mapper.toFinalSchema(tableSchema)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.schema
|
||||
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.component.ColumnType
|
||||
import io.airbyte.cdk.load.data.FieldType
|
||||
import io.airbyte.cdk.load.schema.model.StreamTableSchema
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
|
||||
/** Transforms input schema elements to destination-specific naming and type conventions. */
|
||||
interface TableSchemaMapper {
|
||||
/**
|
||||
* Converts a stream descriptor to the final destination table name.
|
||||
*
|
||||
* @param desc The stream descriptor containing namespace and name information
|
||||
* @return The mapped final table name in the destination system
|
||||
*/
|
||||
fun toFinalTableName(desc: DestinationStream.Descriptor): TableName
|
||||
|
||||
/**
|
||||
* Generates a temporary table name based on the provided final table name. Temporary tables are
|
||||
* typically used before data is moved to final tables to avoid data downtime.
|
||||
*
|
||||
* @param tableName The final table name to base the temporary name on
|
||||
* @return The temporary table name
|
||||
*/
|
||||
fun toTempTableName(tableName: TableName): TableName
|
||||
|
||||
/**
|
||||
* Transforms a column name from the input schema to comply with destination naming conventions.
|
||||
* This may include handling special characters, case transformations, or length limitations.
|
||||
*
|
||||
* @param name The original column name from the input schema
|
||||
* @return The destination-compatible column name
|
||||
*/
|
||||
fun toColumnName(name: String): String
|
||||
|
||||
/**
|
||||
* Converts an Airbyte field type to the corresponding destination-specific column type. This
|
||||
* handles mapping of data types from Airbyte's type system to the destination database's type
|
||||
* system.
|
||||
*
|
||||
* @param fieldType The Airbyte field type to convert
|
||||
* @return The destination-specific column type representation
|
||||
*/
|
||||
fun toColumnType(fieldType: FieldType): ColumnType
|
||||
|
||||
/**
|
||||
* Performs any final transformations on the complete table schema before it's used in the
|
||||
* destination. By default, returns the schema unchanged. Override to apply destination-specific
|
||||
* schema modifications.
|
||||
*
|
||||
* @param tableSchema The complete stream table schema
|
||||
* @return The finalized schema ready for use in the destination
|
||||
*/
|
||||
fun toFinalSchema(tableSchema: StreamTableSchema) = tableSchema
|
||||
|
||||
/**
|
||||
* Determines if two column names conflict according to destination-specific rules. By default,
|
||||
* performs case-insensitive comparison. Override for different conflict detection logic.
|
||||
*
|
||||
* @param a First column name
|
||||
* @param b Second column name
|
||||
* @return true if the column names conflict, false otherwise
|
||||
*/
|
||||
fun colsConflict(a: String, b: String): Boolean = a.equals(b, ignoreCase = true)
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.schema.defaults
|
||||
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.component.ColumnType
|
||||
import io.airbyte.cdk.load.data.FieldType
|
||||
import io.airbyte.cdk.load.schema.TableSchemaMapper
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import io.micronaut.context.annotation.Secondary
|
||||
import jakarta.inject.Singleton
|
||||
|
||||
/**
|
||||
* Default schema mapper that performs no transformations on names or types.
|
||||
*
|
||||
* For destinations that don't do schema munging in the new paradigm.
|
||||
*/
|
||||
@Singleton
|
||||
@Secondary
|
||||
class NoopTableSchemaMapper : TableSchemaMapper {
|
||||
override fun toFinalTableName(desc: DestinationStream.Descriptor) =
|
||||
TableName(desc.namespace ?: "", desc.name)
|
||||
|
||||
override fun toTempTableName(tableName: TableName) = tableName
|
||||
|
||||
override fun toColumnName(name: String) = name
|
||||
|
||||
override fun toColumnType(fieldType: FieldType): ColumnType =
|
||||
ColumnType(
|
||||
fieldType.type.toString(),
|
||||
fieldType.nullable,
|
||||
)
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.schema.model
|
||||
|
||||
import io.airbyte.cdk.load.component.ColumnType
|
||||
import io.airbyte.cdk.load.data.FieldType
|
||||
|
||||
/** Defines column mappings and types from source input to destination table schema. */
|
||||
data class ColumnSchema(
|
||||
// schema on input catalog
|
||||
val inputSchema: Map<String, FieldType>,
|
||||
// column name on input catalog to resolved name
|
||||
val inputToFinalColumnNames: Map<String, String>,
|
||||
// resolved name to resolved type
|
||||
val finalSchema: Map<String, ColumnType>,
|
||||
)
|
||||
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.schema.model
|
||||
|
||||
import io.airbyte.cdk.load.command.Dedupe
|
||||
import io.airbyte.cdk.load.command.ImportType
|
||||
|
||||
/**
|
||||
* Schema information for a stream's table representation resolved for the target destination.
|
||||
*
|
||||
* Contains everything necessary to perform table operations for the associated stream.
|
||||
*/
|
||||
data class StreamTableSchema(
|
||||
val tableNames: TableNames,
|
||||
val columnSchema: ColumnSchema,
|
||||
val importType: ImportType,
|
||||
) {
|
||||
fun getFinalColumnName(rawName: String) = columnSchema.inputToFinalColumnNames[rawName]!!
|
||||
|
||||
/** Note: Returns final munged column names. */
|
||||
fun getCursor() =
|
||||
if (importType is Dedupe)
|
||||
importType.cursor.map { columnSchema.inputToFinalColumnNames[it]!! }
|
||||
else emptyList()
|
||||
|
||||
/** Note: Returns final munged column names. */
|
||||
fun getPrimaryKey() =
|
||||
if (importType is Dedupe)
|
||||
importType.primaryKey.map { keys ->
|
||||
keys.map { columnSchema.inputToFinalColumnNames[it]!! }
|
||||
}
|
||||
else emptyList()
|
||||
}
|
||||
@@ -2,7 +2,9 @@
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.table
|
||||
package io.airbyte.cdk.load.schema.model
|
||||
|
||||
import io.airbyte.cdk.load.table.TableSuffixes
|
||||
|
||||
data class TableName(val namespace: String, val name: String) {
|
||||
fun toPrettyString(quote: String = "", suffix: String = "") =
|
||||
@@ -0,0 +1,24 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.schema.model
|
||||
|
||||
/** Table names used during different stages of data loading. */
|
||||
data class TableNames(
|
||||
// raw only applies to T+D destinations. Pre-deprecated.
|
||||
val rawTableName: TableName? = null,
|
||||
val tempTableName: TableName? = null,
|
||||
val finalTableName: TableName? = null,
|
||||
) {
|
||||
init {
|
||||
check(rawTableName != null || finalTableName != null) {
|
||||
"At least one table name should be nonnull"
|
||||
}
|
||||
}
|
||||
|
||||
fun toPrettyString() =
|
||||
"Raw table: ${rawTableName?.toPrettyString()}; " +
|
||||
"Temp table: ${tempTableName?.toPrettyString()}; " +
|
||||
"Final table: ${finalTableName?.toPrettyString()}"
|
||||
}
|
||||
@@ -4,21 +4,10 @@
|
||||
|
||||
package io.airbyte.cdk.load.table
|
||||
|
||||
import io.airbyte.cdk.util.invert
|
||||
|
||||
/**
|
||||
* map from the column name as declared in the schema, to the column name that we'll create in the
|
||||
* final (typed) table.
|
||||
*/
|
||||
@JvmInline
|
||||
value class ColumnNameMapping(private val columnNameMapping: Map<String, String>) :
|
||||
Map<String, String> by columnNameMapping {
|
||||
/**
|
||||
* Intended for test use only. If we actually need this at runtime, we probably should only
|
||||
* compute the inverse map once.
|
||||
*/
|
||||
// the map is always safe to invert - the entire point of this mapping
|
||||
// is that it's 1:1 between original and mapped names.
|
||||
// (if any two columns mapped to the same name, then they'd collide in the destination).
|
||||
fun originalName(mappedKey: String): String? = columnNameMapping.invert()[mappedKey]
|
||||
}
|
||||
Map<String, String> by columnNameMapping
|
||||
|
||||
@@ -4,4 +4,13 @@
|
||||
|
||||
package io.airbyte.cdk.load.table
|
||||
|
||||
/**
|
||||
* CDC meta column names.
|
||||
*
|
||||
* Note: These CDC column names are brittle as they are separate yet coupled to the logic sources
|
||||
* use to generate these column names. See
|
||||
* [io.airbyte.integrations.source.mssql.MsSqlSourceOperations.MsSqlServerCdcMetaFields] for an
|
||||
* example.
|
||||
*/
|
||||
const val CDC_DELETED_AT_COLUMN = "_ab_cdc_deleted_at"
|
||||
const val CDC_CURSOR_COLUMN = "_ab_cdc_cursor"
|
||||
|
||||
@@ -2,14 +2,15 @@
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.orchestration.db
|
||||
package io.airbyte.cdk.load.table
|
||||
|
||||
import io.airbyte.cdk.load.command.DestinationCatalog
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.component.TableOperationsClient
|
||||
import io.airbyte.cdk.load.orchestration.db.direct_load_table.DirectLoadInitialStatus
|
||||
import io.airbyte.cdk.load.orchestration.db.direct_load_table.DirectLoadTableStatus
|
||||
import io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping.TableCatalog
|
||||
import io.airbyte.cdk.load.table.TableName
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import io.airbyte.cdk.load.schema.model.TableNames
|
||||
import io.airbyte.cdk.load.table.directload.DirectLoadInitialStatus
|
||||
import io.airbyte.cdk.load.table.directload.DirectLoadTableStatus
|
||||
import java.util.concurrent.ConcurrentHashMap
|
||||
import kotlinx.coroutines.coroutineScope
|
||||
import kotlinx.coroutines.launch
|
||||
@@ -31,22 +32,21 @@ interface DatabaseInitialStatus
|
||||
* ```
|
||||
*/
|
||||
fun interface DatabaseInitialStatusGatherer<InitialStatus : DatabaseInitialStatus> {
|
||||
suspend fun gatherInitialStatus(streams: TableCatalog): Map<DestinationStream, InitialStatus>
|
||||
suspend fun gatherInitialStatus(): Map<DestinationStream, InitialStatus>
|
||||
}
|
||||
|
||||
abstract class BaseDirectLoadInitialStatusGatherer(
|
||||
private val tableOperationsClient: TableOperationsClient,
|
||||
private val tempTableNameGenerator: TempTableNameGenerator,
|
||||
private val catalog: DestinationCatalog,
|
||||
) : DatabaseInitialStatusGatherer<DirectLoadInitialStatus> {
|
||||
override suspend fun gatherInitialStatus(
|
||||
streams: TableCatalog
|
||||
): Map<DestinationStream, DirectLoadInitialStatus> {
|
||||
val map = ConcurrentHashMap<DestinationStream, DirectLoadInitialStatus>(streams.size)
|
||||
override suspend fun gatherInitialStatus(): Map<DestinationStream, DirectLoadInitialStatus> {
|
||||
val map =
|
||||
ConcurrentHashMap<DestinationStream, DirectLoadInitialStatus>(catalog.streams.size)
|
||||
coroutineScope {
|
||||
streams.forEach { (stream, tableNameInfo) ->
|
||||
catalog.streams.forEach { s ->
|
||||
launch {
|
||||
val tableName = tableNameInfo.tableNames.finalTableName!!
|
||||
map[stream] = getInitialStatus(tableName)
|
||||
val tableNames = s.tableSchema.tableNames
|
||||
map[s] = getInitialStatus(tableNames)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -65,10 +65,10 @@ abstract class BaseDirectLoadInitialStatusGatherer(
|
||||
}
|
||||
}
|
||||
|
||||
private suspend fun getInitialStatus(tableName: TableName): DirectLoadInitialStatus {
|
||||
private suspend fun getInitialStatus(names: TableNames): DirectLoadInitialStatus {
|
||||
return DirectLoadInitialStatus(
|
||||
realTable = getTableStatus(tableName),
|
||||
tempTable = getTableStatus(tempTableNameGenerator.generate(tableName)),
|
||||
realTable = getTableStatus(names.finalTableName!!),
|
||||
tempTable = getTableStatus(names.tempTableName!!),
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -2,32 +2,16 @@
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.orchestration.db
|
||||
package io.airbyte.cdk.load.table
|
||||
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping.TypingDedupingUtil
|
||||
import io.airbyte.cdk.load.table.TableName
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import io.airbyte.cdk.load.table.TableSuffixes.TMP_TABLE_SUFFIX
|
||||
import jakarta.inject.Singleton
|
||||
import org.apache.commons.codec.digest.DigestUtils
|
||||
|
||||
data class TableNames(
|
||||
// this is pretty dumb, but in theory we could have:
|
||||
// * old-style implementation: raw+final tables both exist
|
||||
// * only the raw table exists (i.e. T+D disabled)
|
||||
// * only the final table exists (i.e. new-style direct-load tables)
|
||||
val rawTableName: TableName?,
|
||||
val finalTableName: TableName?,
|
||||
) {
|
||||
init {
|
||||
check(rawTableName != null || finalTableName != null) {
|
||||
"At least one table name should be nonnull"
|
||||
}
|
||||
}
|
||||
|
||||
fun toPrettyString() =
|
||||
"Raw table: ${rawTableName?.toPrettyString()}; Final table: ${finalTableName?.toPrettyString()}"
|
||||
}
|
||||
|
||||
// Commented out so CI won't be big mad
|
||||
// @Deprecated("Deprecated in favor of TableSchemaMapper")
|
||||
fun interface TempTableNameGenerator {
|
||||
fun generate(originalName: TableName): TableName
|
||||
}
|
||||
@@ -39,7 +23,10 @@ fun interface TempTableNameGenerator {
|
||||
*
|
||||
* T+D destinations simply appended [TMP_TABLE_SUFFIX] to the table name, and should use
|
||||
* [TableName.asOldStyleTempTable] instead
|
||||
*
|
||||
* Not deprecated, but the interface it implements is deprecated.
|
||||
*/
|
||||
@Singleton
|
||||
open class DefaultTempTableNameGenerator(
|
||||
private val internalNamespace: String? = null,
|
||||
private val affixLength: Int = 8,
|
||||
@@ -90,6 +77,8 @@ sealed interface TableNameGenerator {
|
||||
|
||||
fun interface RawTableNameGenerator : TableNameGenerator
|
||||
|
||||
// Commented out so CI won't be big mad
|
||||
// @Deprecated("Deprecated in favor of TableSchemaMapper")
|
||||
fun interface FinalTableNameGenerator : TableNameGenerator
|
||||
|
||||
fun interface ColumnNameGenerator {
|
||||
@@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping
|
||||
package io.airbyte.cdk.load.table
|
||||
|
||||
import kotlin.math.max
|
||||
|
||||
@@ -2,9 +2,9 @@
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.orchestration.db.direct_load_table
|
||||
package io.airbyte.cdk.load.table.directload
|
||||
|
||||
import io.airbyte.cdk.load.orchestration.db.DatabaseInitialStatus
|
||||
import io.airbyte.cdk.load.table.DatabaseInitialStatus
|
||||
|
||||
data class DirectLoadInitialStatus(
|
||||
val realTable: DirectLoadTableStatus?,
|
||||
@@ -2,9 +2,9 @@
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.orchestration.db.direct_load_table
|
||||
package io.airbyte.cdk.load.table.directload
|
||||
|
||||
import io.airbyte.cdk.load.table.TableName
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
|
||||
data class DirectLoadTableExecutionConfig(
|
||||
val tableName: TableName,
|
||||
@@ -2,15 +2,15 @@
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.orchestration.db.direct_load_table
|
||||
package io.airbyte.cdk.load.table.directload
|
||||
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.component.TableOperationsClient
|
||||
import io.airbyte.cdk.load.component.TableSchemaEvolutionClient
|
||||
import io.airbyte.cdk.load.orchestration.db.TempTableNameGenerator
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import io.airbyte.cdk.load.state.StreamProcessingFailed
|
||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||
import io.airbyte.cdk.load.table.TableName
|
||||
import io.airbyte.cdk.load.table.TempTableNameGenerator
|
||||
import io.airbyte.cdk.load.write.StreamLoader
|
||||
import io.airbyte.cdk.load.write.StreamStateStore
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
@@ -11,7 +11,10 @@ import io.airbyte.cdk.load.data.FieldType
|
||||
import io.airbyte.cdk.load.data.IntegerType
|
||||
import io.airbyte.cdk.load.data.ObjectType
|
||||
import io.airbyte.cdk.load.data.StringType
|
||||
import io.airbyte.cdk.load.data.json.JsonSchemaToAirbyteType
|
||||
import io.airbyte.cdk.load.schema.model.ColumnSchema
|
||||
import io.airbyte.cdk.load.schema.model.StreamTableSchema
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import io.airbyte.cdk.load.schema.model.TableNames
|
||||
import io.airbyte.cdk.load.util.deserializeToNode
|
||||
import io.airbyte.protocol.models.v0.AirbyteStream
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog
|
||||
@@ -89,25 +92,6 @@ class DestinationCatalogTest {
|
||||
),
|
||||
)
|
||||
|
||||
@Test
|
||||
fun roundTrip() {
|
||||
val streamFactory =
|
||||
DestinationStreamFactory(
|
||||
JsonSchemaToAirbyteType(JsonSchemaToAirbyteType.UnionBehavior.DEFAULT),
|
||||
namespaceMapper = NamespaceMapper()
|
||||
)
|
||||
val catalogFactory = DefaultDestinationCatalogFactory()
|
||||
val destinationCatalog =
|
||||
catalogFactory.getDestinationCatalog(
|
||||
originalCatalog,
|
||||
streamFactory,
|
||||
operation = "write",
|
||||
checkNamespace = null,
|
||||
namespaceMapper = NamespaceMapper()
|
||||
)
|
||||
assertEquals(originalCatalog, destinationCatalog.asProtocolObject())
|
||||
}
|
||||
|
||||
@Test
|
||||
fun proxyOrderedSchema() {
|
||||
val stream =
|
||||
@@ -128,7 +112,23 @@ class DestinationCatalogTest {
|
||||
"x" to FieldType(IntegerType, nullable = true),
|
||||
)
|
||||
),
|
||||
namespaceMapper = NamespaceMapper()
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
tableSchema =
|
||||
StreamTableSchema(
|
||||
tableNames = TableNames(finalTableName = TableName("namespace", "name")),
|
||||
columnSchema =
|
||||
ColumnSchema(
|
||||
inputSchema =
|
||||
linkedMapOf(
|
||||
"z" to FieldType(StringType, nullable = true),
|
||||
"y" to FieldType(BooleanType, nullable = true),
|
||||
"x" to FieldType(IntegerType, nullable = true),
|
||||
),
|
||||
inputToFinalColumnNames = mapOf("z" to "z", "y" to "y", "x" to "x"),
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = Append,
|
||||
)
|
||||
)
|
||||
val expectedOrderedSchema =
|
||||
arrayOf(
|
||||
@@ -158,6 +158,18 @@ class DestinationCatalogTest {
|
||||
includeFiles = false,
|
||||
schema = ObjectType(linkedMapOf()),
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
tableSchema =
|
||||
StreamTableSchema(
|
||||
tableNames =
|
||||
TableNames(finalTableName = TableName("default", "foo")),
|
||||
columnSchema =
|
||||
ColumnSchema(
|
||||
inputSchema = mapOf(),
|
||||
inputToFinalColumnNames = mapOf(),
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = Append,
|
||||
)
|
||||
),
|
||||
DestinationStream(
|
||||
unmappedNamespace = null,
|
||||
@@ -169,6 +181,18 @@ class DestinationCatalogTest {
|
||||
includeFiles = false,
|
||||
schema = ObjectType(linkedMapOf()),
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
tableSchema =
|
||||
StreamTableSchema(
|
||||
tableNames =
|
||||
TableNames(finalTableName = TableName("default", "foo")),
|
||||
columnSchema =
|
||||
ColumnSchema(
|
||||
inputSchema = mapOf(),
|
||||
inputToFinalColumnNames = mapOf(),
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = Append,
|
||||
)
|
||||
),
|
||||
)
|
||||
)
|
||||
@@ -193,6 +217,22 @@ class DestinationCatalogTest {
|
||||
includeFiles = false,
|
||||
schema = ObjectType(linkedMapOf()),
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
tableSchema =
|
||||
StreamTableSchema(
|
||||
tableNames =
|
||||
TableNames(finalTableName = TableName("default", "foo")),
|
||||
columnSchema =
|
||||
ColumnSchema(
|
||||
inputSchema = mapOf(),
|
||||
inputToFinalColumnNames = mapOf(),
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType =
|
||||
Dedupe(
|
||||
primaryKey = listOf(listOf("id")),
|
||||
cursor = emptyList()
|
||||
),
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
@@ -226,6 +266,25 @@ class DestinationCatalogTest {
|
||||
linkedMapOf("id" to FieldType(IntegerType, nullable = true))
|
||||
),
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
tableSchema =
|
||||
StreamTableSchema(
|
||||
tableNames =
|
||||
TableNames(finalTableName = TableName("default", "foo")),
|
||||
columnSchema =
|
||||
ColumnSchema(
|
||||
inputSchema =
|
||||
linkedMapOf(
|
||||
"id" to FieldType(IntegerType, nullable = true)
|
||||
),
|
||||
inputToFinalColumnNames = mapOf("id" to "id"),
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType =
|
||||
Dedupe(
|
||||
primaryKey = listOf(listOf("id")),
|
||||
cursor = listOf("updated_at"),
|
||||
),
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
@@ -4,7 +4,13 @@
|
||||
|
||||
package io.airbyte.cdk.load.command
|
||||
|
||||
import io.airbyte.cdk.load.data.FieldType
|
||||
import io.airbyte.cdk.load.data.json.JsonSchemaToAirbyteType
|
||||
import io.airbyte.cdk.load.schema.TableSchemaFactory
|
||||
import io.airbyte.cdk.load.schema.model.ColumnSchema
|
||||
import io.airbyte.cdk.load.schema.model.StreamTableSchema
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import io.airbyte.cdk.load.schema.model.TableNames
|
||||
import io.airbyte.protocol.models.JsonSchemaType
|
||||
import io.airbyte.protocol.models.v0.CatalogHelpers
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream
|
||||
@@ -12,6 +18,7 @@ import io.airbyte.protocol.models.v0.DestinationSyncMode
|
||||
import io.airbyte.protocol.models.v0.Field
|
||||
import io.mockk.every
|
||||
import io.mockk.impl.annotations.MockK
|
||||
import io.mockk.mockk
|
||||
import kotlin.test.assertEquals
|
||||
import kotlin.test.assertFailsWith
|
||||
import kotlin.test.assertNull
|
||||
@@ -51,7 +58,8 @@ class DestinationStreamUTest {
|
||||
fun `test given no destination object name when make then no matching keys`() {
|
||||
val configuredStream = a_configured_stream()
|
||||
|
||||
val stream = a_stream_factory().make(configuredStream)
|
||||
val stream =
|
||||
a_stream_factory().make(configuredStream, TableName("namespace", "a_stream_name"))
|
||||
|
||||
assertNull(stream.destinationObjectName)
|
||||
assertNull(stream.matchingKey)
|
||||
@@ -69,7 +77,8 @@ class DestinationStreamUTest {
|
||||
)
|
||||
)
|
||||
|
||||
val stream = a_stream_factory().make(configuredStream)
|
||||
val stream =
|
||||
a_stream_factory().make(configuredStream, TableName("namespace", "a_stream_name"))
|
||||
|
||||
assertEquals(stream.matchingKey, listOf("composite_key_1", "composite_key_2"))
|
||||
assertEquals(stream.destinationObjectName, A_DESTINATION_OBJECT_NAME)
|
||||
@@ -85,7 +94,9 @@ class DestinationStreamUTest {
|
||||
)
|
||||
|
||||
assertFailsWith<IllegalArgumentException>(
|
||||
block = { a_stream_factory().make(configuredStream) }
|
||||
block = {
|
||||
a_stream_factory().make(configuredStream, TableName("namespace", "a_stream_name"))
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
@@ -99,15 +110,36 @@ class DestinationStreamUTest {
|
||||
)
|
||||
|
||||
assertFailsWith<IllegalArgumentException>(
|
||||
block = { a_stream_factory().make(configuredStream) }
|
||||
block = {
|
||||
a_stream_factory().make(configuredStream, TableName("namespace", "a_stream_name"))
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
private fun a_stream_factory(): DestinationStreamFactory =
|
||||
DestinationStreamFactory(
|
||||
private fun a_stream_factory(): DestinationStreamFactory {
|
||||
val mockSchemaFactory = mockk<TableSchemaFactory>()
|
||||
every { mockSchemaFactory.make(any(), any(), any()) } answers
|
||||
{
|
||||
val finalTableName = firstArg<TableName>()
|
||||
val inputSchema = secondArg<Map<String, FieldType>>()
|
||||
val importType = thirdArg<io.airbyte.cdk.load.command.ImportType>()
|
||||
StreamTableSchema(
|
||||
tableNames = TableNames(finalTableName = finalTableName),
|
||||
columnSchema =
|
||||
ColumnSchema(
|
||||
inputSchema = inputSchema,
|
||||
inputToFinalColumnNames = inputSchema.keys.associateWith { it },
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = importType,
|
||||
)
|
||||
}
|
||||
return DestinationStreamFactory(
|
||||
JsonSchemaToAirbyteType(JsonSchemaToAirbyteType.UnionBehavior.DEFAULT),
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
schemaFactory = mockSchemaFactory
|
||||
)
|
||||
}
|
||||
|
||||
private fun a_configured_stream(): ConfiguredAirbyteStream =
|
||||
ConfiguredAirbyteStream()
|
||||
|
||||
@@ -5,7 +5,11 @@
|
||||
package io.airbyte.cdk.load.command
|
||||
|
||||
import io.airbyte.cdk.load.config.NamespaceDefinitionType
|
||||
import io.mockk.mockk
|
||||
import io.airbyte.cdk.load.data.ObjectType
|
||||
import io.airbyte.cdk.load.schema.model.ColumnSchema
|
||||
import io.airbyte.cdk.load.schema.model.StreamTableSchema
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import io.airbyte.cdk.load.schema.model.TableNames
|
||||
import org.junit.jupiter.api.Assertions
|
||||
import org.junit.jupiter.api.Test
|
||||
|
||||
@@ -22,8 +26,20 @@ class NamespaceMapperTest {
|
||||
generationId = 1,
|
||||
minimumGenerationId = 0,
|
||||
syncId = 1,
|
||||
schema = mockk(relaxed = true),
|
||||
namespaceMapper = namespaceMapper
|
||||
schema = ObjectType(linkedMapOf()),
|
||||
namespaceMapper = namespaceMapper,
|
||||
tableSchema =
|
||||
StreamTableSchema(
|
||||
tableNames =
|
||||
TableNames(finalTableName = TableName(unmappedNamespace, unmappedName)),
|
||||
columnSchema =
|
||||
ColumnSchema(
|
||||
inputSchema = mapOf(),
|
||||
inputToFinalColumnNames = mapOf(),
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = Append,
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -5,8 +5,8 @@
|
||||
package io.airbyte.cdk.load.component
|
||||
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||
import io.airbyte.cdk.load.table.TableName
|
||||
import org.junit.jupiter.api.Assertions.*
|
||||
import org.junit.jupiter.api.Test
|
||||
|
||||
|
||||
@@ -93,7 +93,23 @@ class AirbyteValueProxyTest {
|
||||
syncId = 1,
|
||||
includeFiles = false,
|
||||
schema = ALL_TYPES_SCHEMA,
|
||||
namespaceMapper = NamespaceMapper()
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
tableSchema =
|
||||
io.airbyte.cdk.load.schema.model.StreamTableSchema(
|
||||
tableNames =
|
||||
io.airbyte.cdk.load.schema.model.TableNames(
|
||||
finalTableName =
|
||||
io.airbyte.cdk.load.schema.model.TableName("namespace", "name")
|
||||
),
|
||||
columnSchema =
|
||||
io.airbyte.cdk.load.schema.model.ColumnSchema(
|
||||
inputSchema = ALL_TYPES_SCHEMA.properties,
|
||||
inputToFinalColumnNames =
|
||||
ALL_TYPES_SCHEMA.properties.keys.associateWith { it },
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = Append,
|
||||
)
|
||||
)
|
||||
|
||||
private fun ifNull(value: JsonNode?): JsonNode? {
|
||||
|
||||
@@ -26,7 +26,25 @@ class EnrichedDestinationRecordAirbyteValueTest {
|
||||
generationId = 42L,
|
||||
minimumGenerationId = 10L,
|
||||
syncId = 100L,
|
||||
namespaceMapper = NamespaceMapper()
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
tableSchema =
|
||||
io.airbyte.cdk.load.schema.model.StreamTableSchema(
|
||||
tableNames =
|
||||
io.airbyte.cdk.load.schema.model.TableNames(
|
||||
finalTableName =
|
||||
io.airbyte.cdk.load.schema.model.TableName(
|
||||
"test_namespace",
|
||||
"test_stream"
|
||||
)
|
||||
),
|
||||
columnSchema =
|
||||
io.airbyte.cdk.load.schema.model.ColumnSchema(
|
||||
inputSchema = mapOf(),
|
||||
inputToFinalColumnNames = mapOf(),
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = Append,
|
||||
)
|
||||
)
|
||||
|
||||
private val emittedAtMs = 1234567890L
|
||||
|
||||
@@ -7,6 +7,7 @@ package io.airbyte.cdk.load.dataflow.stages
|
||||
import io.airbyte.cdk.load.command.Append
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.command.NamespaceMapper
|
||||
import io.airbyte.cdk.load.data.ObjectType
|
||||
import io.airbyte.cdk.load.data.StringValue
|
||||
import io.airbyte.cdk.load.dataflow.pipeline.DataFlowStageIO
|
||||
import io.airbyte.cdk.load.dataflow.state.PartitionKey
|
||||
@@ -50,11 +51,29 @@ class ParseStageTest {
|
||||
unmappedNamespace = "test-namespace",
|
||||
unmappedName = "test-stream",
|
||||
importType = Append,
|
||||
schema = io.airbyte.cdk.load.data.ObjectType(linkedMapOf()),
|
||||
schema = ObjectType(linkedMapOf()),
|
||||
generationId = 1L,
|
||||
minimumGenerationId = 1L,
|
||||
syncId = 1L,
|
||||
namespaceMapper = NamespaceMapper()
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
tableSchema =
|
||||
io.airbyte.cdk.load.schema.model.StreamTableSchema(
|
||||
tableNames =
|
||||
io.airbyte.cdk.load.schema.model.TableNames(
|
||||
finalTableName =
|
||||
io.airbyte.cdk.load.schema.model.TableName(
|
||||
"test-namespace",
|
||||
"test-stream"
|
||||
)
|
||||
),
|
||||
columnSchema =
|
||||
io.airbyte.cdk.load.schema.model.ColumnSchema(
|
||||
inputSchema = mapOf(),
|
||||
inputToFinalColumnNames = mapOf(),
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = Append,
|
||||
)
|
||||
)
|
||||
rawRecord =
|
||||
DestinationRecordRaw(
|
||||
|
||||
@@ -8,7 +8,6 @@ import io.airbyte.cdk.load.command.Append
|
||||
import io.airbyte.cdk.load.command.DestinationCatalog
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.command.NamespaceMapper
|
||||
import io.airbyte.cdk.load.data.StringType
|
||||
import io.airbyte.cdk.load.dataflow.state.stats.EmissionStats
|
||||
import io.airbyte.cdk.output.OutputConsumer
|
||||
import io.airbyte.protocol.models.Jsons
|
||||
@@ -235,11 +234,29 @@ class EmittedStatsStoreImplTest {
|
||||
unmappedNamespace = namespace,
|
||||
unmappedName = name,
|
||||
importType = Append,
|
||||
schema = StringType,
|
||||
schema = io.airbyte.cdk.load.data.ObjectType(linkedMapOf()),
|
||||
generationId = 1L,
|
||||
minimumGenerationId = 1L,
|
||||
syncId = 1L,
|
||||
namespaceMapper = namespaceMapper
|
||||
namespaceMapper = namespaceMapper,
|
||||
tableSchema =
|
||||
io.airbyte.cdk.load.schema.model.StreamTableSchema(
|
||||
tableNames =
|
||||
io.airbyte.cdk.load.schema.model.TableNames(
|
||||
finalTableName =
|
||||
io.airbyte.cdk.load.schema.model.TableName(
|
||||
namespace ?: "default",
|
||||
name
|
||||
)
|
||||
),
|
||||
columnSchema =
|
||||
io.airbyte.cdk.load.schema.model.ColumnSchema(
|
||||
inputSchema = mapOf(),
|
||||
inputToFinalColumnNames = mapOf(),
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = Append,
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,7 +16,6 @@ import io.airbyte.cdk.load.data.StringType
|
||||
import io.airbyte.cdk.load.data.StringValue
|
||||
import io.airbyte.cdk.load.data.UnionType
|
||||
import io.airbyte.cdk.load.dataflow.state.PartitionKey
|
||||
import io.airbyte.cdk.load.dataflow.transform.ColumnNameMapper
|
||||
import io.airbyte.cdk.load.dataflow.transform.ValidationResult
|
||||
import io.airbyte.cdk.load.dataflow.transform.ValueCoercer
|
||||
import io.airbyte.cdk.load.dataflow.transform.data.ValidationResultHandler
|
||||
@@ -34,8 +33,6 @@ import org.junit.jupiter.api.extension.ExtendWith
|
||||
|
||||
@ExtendWith(MockKExtension::class)
|
||||
class JsonRecordConversionTest {
|
||||
@MockK lateinit var columnNameMapper: ColumnNameMapper
|
||||
|
||||
@MockK lateinit var valueCoercer: ValueCoercer
|
||||
|
||||
private lateinit var validationResultHandler: ValidationResultHandler
|
||||
@@ -45,16 +42,14 @@ class JsonRecordConversionTest {
|
||||
@BeforeEach
|
||||
fun setup() {
|
||||
validationResultHandler = ValidationResultHandler(mockk(relaxed = true))
|
||||
jsonConverter = JsonConverter(columnNameMapper, valueCoercer, validationResultHandler)
|
||||
jsonConverter = JsonConverter(valueCoercer, validationResultHandler)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `transforms record into map of munged keys and values`() {
|
||||
// add "_munged" to every key so we can validate we get the mapped cols
|
||||
every { columnNameMapper.getMappedColumnName(any(), any()) } answers
|
||||
{
|
||||
secondArg<String>() + "_munged"
|
||||
}
|
||||
// NOTE: columnNameMapper has been removed from the API
|
||||
// Column name mapping is now handled by the stream's tableSchema
|
||||
// This test has been modified to work with the new API
|
||||
|
||||
every { valueCoercer.validate(any<EnrichedAirbyteValue>()) } returns ValidationResult.Valid
|
||||
|
||||
@@ -87,10 +82,28 @@ class JsonRecordConversionTest {
|
||||
"internal_field_2" to Fixtures.mockCoercedValue(IntegerValue(0)),
|
||||
"internal_field_3" to Fixtures.mockCoercedValue(BooleanValue(true)),
|
||||
)
|
||||
// Mock the stream with tableSchema that provides column name mapping
|
||||
val mockStream =
|
||||
mockk<io.airbyte.cdk.load.command.DestinationStream> {
|
||||
every { tableSchema } returns
|
||||
mockk {
|
||||
every { getFinalColumnName(any()) } answers
|
||||
{
|
||||
val columnName = firstArg<String>()
|
||||
if (columnName.startsWith("user_field")) {
|
||||
"${columnName}_munged"
|
||||
} else {
|
||||
columnName
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
val coerced =
|
||||
mockk<EnrichedDestinationRecordAirbyteValue> {
|
||||
every { declaredFields } answers { userFields }
|
||||
every { airbyteMetaFields } answers { internalFields }
|
||||
every { stream } returns mockStream
|
||||
}
|
||||
|
||||
val input =
|
||||
|
||||
@@ -9,7 +9,6 @@ import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.data.*
|
||||
import io.airbyte.cdk.load.data.AirbyteValueProxy.FieldAccessor
|
||||
import io.airbyte.cdk.load.dataflow.state.PartitionKey
|
||||
import io.airbyte.cdk.load.dataflow.transform.ColumnNameMapper
|
||||
import io.airbyte.cdk.load.dataflow.transform.ValidationResult
|
||||
import io.airbyte.cdk.load.dataflow.transform.ValueCoercer
|
||||
import io.airbyte.cdk.load.dataflow.transform.data.ValidationResultHandler
|
||||
@@ -49,11 +48,6 @@ class ProtobufConverterTest {
|
||||
every { validate(any()) } returns ValidationResult.Valid
|
||||
}
|
||||
|
||||
private fun createMockMapperPassThrough(): ColumnNameMapper =
|
||||
mockk<ColumnNameMapper> {
|
||||
every { getMappedColumnName(any(), any()) } answers { secondArg<String>() }
|
||||
}
|
||||
|
||||
private fun fa(name: String, type: AirbyteType, idx: Int): FieldAccessor = mockk {
|
||||
every { this@mockk.name } returns name
|
||||
every { this@mockk.type } returns type
|
||||
@@ -134,7 +128,8 @@ class ProtobufConverterTest {
|
||||
source: DestinationRecordProtobufSource = buildProtoSource(emptyList()),
|
||||
generationId: Long = 1L,
|
||||
syncId: Long = 2L,
|
||||
unknownChanges: List<Meta.Change> = emptyList()
|
||||
unknownChanges: List<Meta.Change> = emptyList(),
|
||||
columnNameMapper: ((String) -> String)? = null
|
||||
): DestinationRecordRaw {
|
||||
val destinationStream =
|
||||
mockk<DestinationStream> {
|
||||
@@ -145,6 +140,15 @@ class ProtobufConverterTest {
|
||||
every { mappedDescriptor } returns DestinationStream.Descriptor("namespace", "name")
|
||||
every { unmappedDescriptor } returns
|
||||
DestinationStream.Descriptor("namespace", "name")
|
||||
// Add tableSchema mock
|
||||
every { tableSchema } returns
|
||||
mockk {
|
||||
every { getFinalColumnName(any()) } answers
|
||||
{
|
||||
val columnName = firstArg<String>()
|
||||
columnNameMapper?.invoke(columnName) ?: columnName
|
||||
}
|
||||
}
|
||||
}
|
||||
return mockk<DestinationRecordRaw> {
|
||||
every { stream } returns destinationStream
|
||||
@@ -156,9 +160,8 @@ class ProtobufConverterTest {
|
||||
@Test
|
||||
fun `convertWithMetadata processes basic types correctly`() {
|
||||
val valueCoercer = createMockCoercerPassThrough()
|
||||
val columnNameMapper = createMockMapperPassThrough()
|
||||
val validationResultHandler = ValidationResultHandler(mockk(relaxed = true))
|
||||
val converter = ProtobufConverter(columnNameMapper, valueCoercer, validationResultHandler)
|
||||
val converter = ProtobufConverter(valueCoercer, validationResultHandler)
|
||||
|
||||
val accessors =
|
||||
arrayOf(
|
||||
@@ -268,9 +271,8 @@ class ProtobufConverterTest {
|
||||
@Test
|
||||
fun `convertWithMetadata handles BigDecimal values correctly`() {
|
||||
val valueCoercer = createMockCoercerPassThrough()
|
||||
val columnNameMapper = createMockMapperPassThrough()
|
||||
val validationResultHandler = ValidationResultHandler(mockk(relaxed = true))
|
||||
val converter = ProtobufConverter(columnNameMapper, valueCoercer, validationResultHandler)
|
||||
val converter = ProtobufConverter(valueCoercer, validationResultHandler)
|
||||
|
||||
val accessors =
|
||||
arrayOf(
|
||||
@@ -311,9 +313,8 @@ class ProtobufConverterTest {
|
||||
@Test
|
||||
fun `convertWithMetadata handles null values`() {
|
||||
val valueCoercer = createMockCoercerPassThrough()
|
||||
val columnNameMapper = createMockMapperPassThrough()
|
||||
val validationResultHandler = ValidationResultHandler(mockk(relaxed = true))
|
||||
val converter = ProtobufConverter(columnNameMapper, valueCoercer, validationResultHandler)
|
||||
val converter = ProtobufConverter(valueCoercer, validationResultHandler)
|
||||
|
||||
val accessors = arrayOf(fa("null_field", StringType, 0))
|
||||
|
||||
@@ -338,8 +339,7 @@ class ProtobufConverterTest {
|
||||
every { validate(any()) } returns ValidationResult.Valid
|
||||
}
|
||||
val validationResultHandler = ValidationResultHandler(mockk(relaxed = true))
|
||||
val columnNameMapper = createMockMapperPassThrough()
|
||||
val converter = ProtobufConverter(columnNameMapper, valueCoercer, validationResultHandler)
|
||||
val converter = ProtobufConverter(valueCoercer, validationResultHandler)
|
||||
|
||||
val accessors = arrayOf(fa("time_field", TimeTypeWithoutTimezone, 0))
|
||||
val protoValues = listOf(vTimeNoTz(LocalTime.parse("12:34:56")))
|
||||
@@ -384,9 +384,8 @@ class ProtobufConverterTest {
|
||||
}
|
||||
}
|
||||
}
|
||||
val columnNameMapper = createMockMapperPassThrough()
|
||||
val validationResultHandler = ValidationResultHandler(mockk(relaxed = true))
|
||||
val converter = ProtobufConverter(columnNameMapper, valueCoercer, validationResultHandler)
|
||||
val converter = ProtobufConverter(valueCoercer, validationResultHandler)
|
||||
|
||||
val accessors = arrayOf(fa("short_string", StringType, 0), fa("long_string", StringType, 1))
|
||||
val protoValues = listOf(vString("hello"), vString("this_is_too_long"))
|
||||
@@ -406,24 +405,26 @@ class ProtobufConverterTest {
|
||||
@Test
|
||||
fun `convertWithMetadata applies column mapping`() {
|
||||
val valueCoercer = createMockCoercerPassThrough()
|
||||
val columnNameMapper =
|
||||
object : ColumnNameMapper {
|
||||
override fun getMappedColumnName(
|
||||
stream: DestinationStream,
|
||||
columnName: String
|
||||
): String = if (columnName == "original_name") "mapped_name" else columnName
|
||||
}
|
||||
// NOTE: Column name mapping is now handled by the stream's tableSchema
|
||||
// This test has been modified to work with the new API
|
||||
val validationResultHandler = ValidationResultHandler(mockk(relaxed = true))
|
||||
val converter = ProtobufConverter(columnNameMapper, valueCoercer, validationResultHandler)
|
||||
val converter = ProtobufConverter(valueCoercer, validationResultHandler)
|
||||
|
||||
val accessors = arrayOf(fa("original_name", StringType, 0))
|
||||
val protoValues = listOf(vString("test"))
|
||||
|
||||
val msg =
|
||||
mockMsgWithStream(accessors, source = buildProtoSource(protoValues.map { it.build() }))
|
||||
mockMsgWithStream(
|
||||
accessors,
|
||||
source = buildProtoSource(protoValues.map { it.build() }),
|
||||
columnNameMapper = { columnName ->
|
||||
if (columnName == "original_name") "mapped_name" else columnName
|
||||
}
|
||||
)
|
||||
|
||||
val result = converter.convert(ConversionInput(msg, PartitionKey("test-key")))
|
||||
|
||||
// Column mapping is now handled by tableSchema
|
||||
assertFalse(result.containsKey("original_name"))
|
||||
assertTrue(result.containsKey("mapped_name"))
|
||||
assertEquals("test", (result["mapped_name"] as StringValue).value)
|
||||
@@ -432,9 +433,8 @@ class ProtobufConverterTest {
|
||||
@Test
|
||||
fun `convertWithMetadata handles parsing exceptions`() {
|
||||
val valueCoercer = createMockCoercerPassThrough()
|
||||
val columnNameMapper = createMockMapperPassThrough()
|
||||
val validationResultHandler = ValidationResultHandler(mockk(relaxed = true))
|
||||
val converter = ProtobufConverter(columnNameMapper, valueCoercer, validationResultHandler)
|
||||
val converter = ProtobufConverter(valueCoercer, validationResultHandler)
|
||||
|
||||
val accessors = arrayOf(fa("invalid_int", IntegerType, 0))
|
||||
|
||||
@@ -457,9 +457,8 @@ class ProtobufConverterTest {
|
||||
@Test
|
||||
fun `convertWithMetadata merges meta changes from source + stream unknown changes + parsing failures`() {
|
||||
val valueCoercer = createMockCoercerPassThrough()
|
||||
val columnNameMapper = createMockMapperPassThrough()
|
||||
val validationResultHandler = ValidationResultHandler(mockk(relaxed = true))
|
||||
val converter = ProtobufConverter(columnNameMapper, valueCoercer, validationResultHandler)
|
||||
val converter = ProtobufConverter(valueCoercer, validationResultHandler)
|
||||
|
||||
val accessors = arrayOf(fa("ok_str", StringType, 0), fa("bad_int", IntegerType, 1))
|
||||
|
||||
|
||||
@@ -38,7 +38,6 @@ import io.airbyte.cdk.load.data.TimestampWithoutTimezoneValue
|
||||
import io.airbyte.cdk.load.data.UnionType
|
||||
import io.airbyte.cdk.load.data.UnknownType
|
||||
import io.airbyte.cdk.load.dataflow.state.PartitionKey
|
||||
import io.airbyte.cdk.load.dataflow.transform.ColumnNameMapper
|
||||
import io.airbyte.cdk.load.dataflow.transform.ValidationResult
|
||||
import io.airbyte.cdk.load.dataflow.transform.ValueCoercer
|
||||
import io.airbyte.cdk.load.dataflow.transform.data.ValidationResultHandler
|
||||
@@ -83,7 +82,6 @@ class ProtobufRecordConversionTest {
|
||||
private val generationId = 314L
|
||||
|
||||
private lateinit var stream: DestinationStream
|
||||
private lateinit var columnNameMapper: ColumnNameMapper
|
||||
private lateinit var valueCoercer: ValueCoercer
|
||||
private lateinit var validationResultHandler: ValidationResultHandler
|
||||
private var protoSource: DestinationRecordProtobufSource? = null
|
||||
@@ -93,15 +91,7 @@ class ProtobufRecordConversionTest {
|
||||
|
||||
@BeforeEach
|
||||
fun setUp() {
|
||||
columnNameMapper =
|
||||
object : ColumnNameMapper {
|
||||
override fun getMappedColumnName(
|
||||
stream: DestinationStream,
|
||||
columnName: String
|
||||
): String {
|
||||
return "mapped_$columnName"
|
||||
}
|
||||
}
|
||||
// NOTE: Column name mapping is now handled by the stream's tableSchema
|
||||
|
||||
valueCoercer =
|
||||
object : ValueCoercer {
|
||||
@@ -281,6 +271,15 @@ class ProtobufRecordConversionTest {
|
||||
DestinationStream.Descriptor("", "dummy")
|
||||
every { this@mockk.unknownColumnChanges } returns
|
||||
dummyType.computeUnknownColumnChanges()
|
||||
// Add tableSchema mock for column name mapping
|
||||
every { this@mockk.tableSchema } returns
|
||||
mockk {
|
||||
every { getFinalColumnName(any()) } answers
|
||||
{
|
||||
val columnName = firstArg<String>()
|
||||
"mapped_$columnName"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
record =
|
||||
@@ -298,7 +297,7 @@ class ProtobufRecordConversionTest {
|
||||
every { this@mockk.stream } returns this@ProtobufRecordConversionTest.stream
|
||||
}
|
||||
|
||||
converter = ProtobufConverter(columnNameMapper, valueCoercer, validationResultHandler)
|
||||
converter = ProtobufConverter(valueCoercer, validationResultHandler)
|
||||
}
|
||||
|
||||
@AfterEach fun tearDown() = unmockkAll()
|
||||
|
||||
@@ -71,7 +71,25 @@ internal class DestinationMessageTest {
|
||||
generationId = 42,
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapper
|
||||
namespaceMapper = namespaceMapper,
|
||||
tableSchema =
|
||||
io.airbyte.cdk.load.schema.model.StreamTableSchema(
|
||||
tableNames =
|
||||
io.airbyte.cdk.load.schema.model.TableNames(
|
||||
finalTableName =
|
||||
io.airbyte.cdk.load.schema.model.TableName(
|
||||
descriptor.namespace ?: "default",
|
||||
descriptor.name
|
||||
)
|
||||
),
|
||||
columnSchema =
|
||||
io.airbyte.cdk.load.schema.model.ColumnSchema(
|
||||
inputSchema = mapOf(),
|
||||
inputToFinalColumnNames = mapOf(),
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = Append,
|
||||
)
|
||||
)
|
||||
)
|
||||
),
|
||||
@@ -614,6 +632,14 @@ internal class DestinationMessageTest {
|
||||
@Test
|
||||
fun `message factory creates record from protobuf`() {
|
||||
// Note: can't be a mock or `schemaInAirbyteProxyOrder` won't return the correct value
|
||||
val streamSchema =
|
||||
ObjectType(
|
||||
properties =
|
||||
linkedMapOf(
|
||||
"id" to FieldType(IntegerType, nullable = true),
|
||||
"name" to FieldType(StringType, nullable = true)
|
||||
)
|
||||
)
|
||||
val stream =
|
||||
DestinationStream(
|
||||
unmappedNamespace = "namespace",
|
||||
@@ -622,15 +648,24 @@ internal class DestinationMessageTest {
|
||||
generationId = 1,
|
||||
minimumGenerationId = 0,
|
||||
syncId = 1,
|
||||
schema =
|
||||
ObjectType(
|
||||
properties =
|
||||
linkedMapOf(
|
||||
"id" to FieldType(IntegerType, nullable = true),
|
||||
"name" to FieldType(StringType, nullable = true)
|
||||
)
|
||||
),
|
||||
namespaceMapper = NamespaceMapper()
|
||||
schema = streamSchema,
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
tableSchema =
|
||||
io.airbyte.cdk.load.schema.model.StreamTableSchema(
|
||||
tableNames =
|
||||
io.airbyte.cdk.load.schema.model.TableNames(
|
||||
finalTableName =
|
||||
io.airbyte.cdk.load.schema.model.TableName("namespace", "name")
|
||||
),
|
||||
columnSchema =
|
||||
io.airbyte.cdk.load.schema.model.ColumnSchema(
|
||||
inputSchema = streamSchema.properties,
|
||||
inputToFinalColumnNames =
|
||||
streamSchema.properties.keys.associateWith { it },
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = Append,
|
||||
)
|
||||
)
|
||||
val catalog = DestinationCatalog(streams = listOf(stream))
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
package io.airbyte.cdk.load.message
|
||||
|
||||
import io.airbyte.cdk.load.command.Append
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.command.NamespaceMapper
|
||||
import io.airbyte.cdk.load.data.*
|
||||
@@ -41,12 +42,31 @@ class DestinationRecordRawTest {
|
||||
DestinationStream(
|
||||
unmappedNamespace = "test_namespace",
|
||||
unmappedName = "test_stream",
|
||||
io.airbyte.cdk.load.command.Append,
|
||||
Append,
|
||||
recordSchema,
|
||||
generationId = 42L,
|
||||
minimumGenerationId = 0L,
|
||||
syncId = 123L,
|
||||
namespaceMapper = NamespaceMapper()
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
tableSchema =
|
||||
io.airbyte.cdk.load.schema.model.StreamTableSchema(
|
||||
tableNames =
|
||||
io.airbyte.cdk.load.schema.model.TableNames(
|
||||
finalTableName =
|
||||
io.airbyte.cdk.load.schema.model.TableName(
|
||||
"test_namespace",
|
||||
"test_stream"
|
||||
)
|
||||
),
|
||||
columnSchema =
|
||||
io.airbyte.cdk.load.schema.model.ColumnSchema(
|
||||
inputSchema = recordSchema.properties,
|
||||
inputToFinalColumnNames =
|
||||
recordSchema.properties.keys.associateWith { it },
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = Append,
|
||||
)
|
||||
)
|
||||
|
||||
@Test
|
||||
@@ -272,12 +292,30 @@ class DestinationRecordRawTest {
|
||||
DestinationStream(
|
||||
unmappedNamespace = "test_namespace",
|
||||
unmappedName = "test_stream",
|
||||
io.airbyte.cdk.load.command.Append,
|
||||
Append,
|
||||
emptySchema,
|
||||
generationId = 42L,
|
||||
minimumGenerationId = 0L,
|
||||
syncId = 123L,
|
||||
namespaceMapper = NamespaceMapper()
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
tableSchema =
|
||||
io.airbyte.cdk.load.schema.model.StreamTableSchema(
|
||||
tableNames =
|
||||
io.airbyte.cdk.load.schema.model.TableNames(
|
||||
finalTableName =
|
||||
io.airbyte.cdk.load.schema.model.TableName(
|
||||
"test_namespace",
|
||||
"test_stream"
|
||||
)
|
||||
),
|
||||
columnSchema =
|
||||
io.airbyte.cdk.load.schema.model.ColumnSchema(
|
||||
inputSchema = mapOf(),
|
||||
inputToFinalColumnNames = mapOf(),
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = Append,
|
||||
)
|
||||
)
|
||||
|
||||
val jsonData = """{"field1": "value1", "field2": 123}"""
|
||||
@@ -352,12 +390,31 @@ class DestinationRecordRawTest {
|
||||
DestinationStream(
|
||||
unmappedNamespace = "test_namespace",
|
||||
unmappedName = "test_stream",
|
||||
io.airbyte.cdk.load.command.Append,
|
||||
Append,
|
||||
complexSchema,
|
||||
generationId = 42L,
|
||||
minimumGenerationId = 0L,
|
||||
syncId = 123L,
|
||||
namespaceMapper = NamespaceMapper()
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
tableSchema =
|
||||
io.airbyte.cdk.load.schema.model.StreamTableSchema(
|
||||
tableNames =
|
||||
io.airbyte.cdk.load.schema.model.TableNames(
|
||||
finalTableName =
|
||||
io.airbyte.cdk.load.schema.model.TableName(
|
||||
"test_namespace",
|
||||
"test_stream"
|
||||
)
|
||||
),
|
||||
columnSchema =
|
||||
io.airbyte.cdk.load.schema.model.ColumnSchema(
|
||||
inputSchema = complexSchema.properties,
|
||||
inputToFinalColumnNames =
|
||||
complexSchema.properties.keys.associateWith { it },
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = Append,
|
||||
)
|
||||
)
|
||||
|
||||
val jsonData =
|
||||
|
||||
@@ -52,7 +52,21 @@ class PipelineEventBookkeepingRouterTest {
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
namespaceMapper = NamespaceMapper()
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
tableSchema =
|
||||
io.airbyte.cdk.load.schema.model.StreamTableSchema(
|
||||
tableNames =
|
||||
io.airbyte.cdk.load.schema.model.TableNames(
|
||||
finalTableName = io.airbyte.cdk.load.schema.model.TableName("ns", "s1")
|
||||
),
|
||||
columnSchema =
|
||||
io.airbyte.cdk.load.schema.model.ColumnSchema(
|
||||
inputSchema = mapOf(),
|
||||
inputToFinalColumnNames = mapOf(),
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = io.airbyte.cdk.load.command.Append,
|
||||
)
|
||||
)
|
||||
private val stream2 =
|
||||
DestinationStream(
|
||||
@@ -63,7 +77,21 @@ class PipelineEventBookkeepingRouterTest {
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
namespaceMapper = NamespaceMapper()
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
tableSchema =
|
||||
io.airbyte.cdk.load.schema.model.StreamTableSchema(
|
||||
tableNames =
|
||||
io.airbyte.cdk.load.schema.model.TableNames(
|
||||
finalTableName = io.airbyte.cdk.load.schema.model.TableName("ns", "s2")
|
||||
),
|
||||
columnSchema =
|
||||
io.airbyte.cdk.load.schema.model.ColumnSchema(
|
||||
inputSchema = mapOf(),
|
||||
inputToFinalColumnNames = mapOf(),
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = io.airbyte.cdk.load.command.Append,
|
||||
)
|
||||
)
|
||||
|
||||
private fun router(numDataChannels: Int, markEndOfStreamAtEnd: Boolean = false) =
|
||||
|
||||
@@ -0,0 +1,146 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.schema
|
||||
|
||||
import io.mockk.every
|
||||
import io.mockk.impl.annotations.MockK
|
||||
import io.mockk.junit5.MockKExtension
|
||||
import org.junit.jupiter.api.Assertions.assertEquals
|
||||
import org.junit.jupiter.api.Assertions.assertThrows
|
||||
import org.junit.jupiter.api.Test
|
||||
import org.junit.jupiter.api.extension.ExtendWith
|
||||
|
||||
@ExtendWith(MockKExtension::class)
|
||||
class ColumnNameResolverTest {
|
||||
@MockK private lateinit var mapper: TableSchemaMapper
|
||||
|
||||
@Test
|
||||
fun `handles no collisions`() {
|
||||
val resolver = ColumnNameResolver(mapper)
|
||||
val columns = setOf("col1", "col2", "col3")
|
||||
|
||||
every { mapper.toColumnName("col1") } returns "col1"
|
||||
every { mapper.toColumnName("col2") } returns "col2"
|
||||
every { mapper.toColumnName("col3") } returns "col3"
|
||||
every { mapper.colsConflict(any(), any()) } returns false
|
||||
|
||||
val result = resolver.getColumnNameMapping(columns)
|
||||
|
||||
assertEquals(3, result.size)
|
||||
assertEquals("col1", result["col1"])
|
||||
assertEquals("col2", result["col2"])
|
||||
assertEquals("col3", result["col3"])
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `handles simple collision with numeric suffix`() {
|
||||
val resolver = ColumnNameResolver(mapper)
|
||||
val columns = setOf("name", "Name")
|
||||
|
||||
every { mapper.toColumnName("name") } returns "name"
|
||||
every { mapper.toColumnName("Name") } returns "name"
|
||||
every { mapper.toColumnName("Name_1") } returns "name_1"
|
||||
every { mapper.colsConflict(any(), any()) } answers { args[0] == args[1] }
|
||||
|
||||
val result = resolver.getColumnNameMapping(columns)
|
||||
|
||||
assertEquals(2, result.size)
|
||||
assertEquals("name", result["name"])
|
||||
assertEquals("name_1", result["Name"])
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `handles multiple collisions with incremental suffixes`() {
|
||||
val resolver = ColumnNameResolver(mapper)
|
||||
val columns = setOf("col", "Col", "COL")
|
||||
|
||||
every { mapper.toColumnName("col") } returns "col"
|
||||
every { mapper.toColumnName("Col") } returns "col"
|
||||
every { mapper.toColumnName("COL") } returns "col"
|
||||
every { mapper.toColumnName("Col_1") } returns "col_1"
|
||||
every { mapper.toColumnName("COL_1") } returns "col_1"
|
||||
every { mapper.toColumnName("COL_2") } returns "col_2"
|
||||
every { mapper.colsConflict(any(), any()) } answers { args[0] == args[1] }
|
||||
|
||||
val result = resolver.getColumnNameMapping(columns)
|
||||
|
||||
assertEquals(3, result.size)
|
||||
assertEquals("col", result["col"])
|
||||
assertEquals("col_1", result["Col"])
|
||||
assertEquals("col_2", result["COL"])
|
||||
}
|
||||
|
||||
// We're testing some internals here, but I think it's important to validate this behavior as it
|
||||
// represents an API contract with the destination. Any changes here will potentially affect
|
||||
// customer destination schemas.
|
||||
@Test
|
||||
fun `handles truncation with super resolution`() {
|
||||
val resolver = ColumnNameResolver(mapper)
|
||||
val shortName = "short"
|
||||
val longName1 = "a".repeat(100)
|
||||
val longName2 = "a".repeat(50)
|
||||
val columns = setOf("short", longName1, longName2)
|
||||
|
||||
every { mapper.toColumnName(shortName) } returns "short"
|
||||
every { mapper.toColumnName(longName1) } returns "truncated"
|
||||
every { mapper.toColumnName(longName2) } returns "truncated"
|
||||
every { mapper.toColumnName("${longName1}_1") } returns "truncated"
|
||||
every { mapper.toColumnName("${longName2}_1") } returns "truncated"
|
||||
every { mapper.toColumnName("aa46aa") } returns "different"
|
||||
every { mapper.colsConflict(any(), any()) } answers { args[0] == args[1] }
|
||||
|
||||
val result = resolver.getColumnNameMapping(columns)
|
||||
|
||||
assertEquals(3, result.size)
|
||||
assertEquals("short", result["short"])
|
||||
assertEquals("truncated", result[longName1])
|
||||
assertEquals("different", result[longName2])
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `throws exception when super resolution fails`() {
|
||||
val resolver = ColumnNameResolver(mapper)
|
||||
val shortName = "short"
|
||||
val longName1 = "a".repeat(100)
|
||||
val longName2 = "a".repeat(50)
|
||||
val columns = setOf("short", longName1, longName2)
|
||||
|
||||
every { mapper.toColumnName(shortName) } returns "short"
|
||||
every { mapper.toColumnName(longName1) } returns "truncated"
|
||||
every { mapper.toColumnName(longName2) } returns "truncated"
|
||||
every { mapper.toColumnName("${longName1}_1") } returns "truncated"
|
||||
every { mapper.toColumnName("${longName2}_1") } returns "truncated"
|
||||
every { mapper.toColumnName("aa46aa") } returns "truncated"
|
||||
every { mapper.colsConflict(any(), any()) } answers { args[0] == args[1] }
|
||||
|
||||
assertThrows(IllegalArgumentException::class.java) {
|
||||
resolver.getColumnNameMapping(columns)
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `handles empty set`() {
|
||||
val resolver = ColumnNameResolver(mapper)
|
||||
val result = resolver.getColumnNameMapping(emptySet())
|
||||
|
||||
assertEquals(0, result.size)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `preserves original names when no processing needed`() {
|
||||
val resolver = ColumnNameResolver(mapper)
|
||||
val columns = setOf("valid_name_1", "valid_name_2")
|
||||
|
||||
every { mapper.toColumnName("valid_name_1") } returns "valid_name_1"
|
||||
every { mapper.toColumnName("valid_name_2") } returns "valid_name_2"
|
||||
every { mapper.colsConflict(any(), any()) } returns false
|
||||
|
||||
val result = resolver.getColumnNameMapping(columns)
|
||||
|
||||
assertEquals(2, result.size)
|
||||
assertEquals("valid_name_1", result["valid_name_1"])
|
||||
assertEquals("valid_name_2", result["valid_name_2"])
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,126 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.schema
|
||||
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import io.mockk.every
|
||||
import io.mockk.impl.annotations.MockK
|
||||
import io.mockk.junit5.MockKExtension
|
||||
import org.junit.jupiter.api.Assertions.assertEquals
|
||||
import org.junit.jupiter.api.Test
|
||||
import org.junit.jupiter.api.extension.ExtendWith
|
||||
|
||||
@ExtendWith(MockKExtension::class)
|
||||
class TableNameResolverTest {
|
||||
@MockK private lateinit var mapper: TableSchemaMapper
|
||||
|
||||
@Test
|
||||
fun `handles no collisions`() {
|
||||
val resolver = TableNameResolver(mapper)
|
||||
val desc1 = DestinationStream.Descriptor("namespace1", "stream1")
|
||||
val desc2 = DestinationStream.Descriptor("namespace2", "stream2")
|
||||
val descriptors = setOf(desc1, desc2)
|
||||
|
||||
val table1 = TableName("namespace1", "stream1")
|
||||
val table2 = TableName("namespace2", "stream2")
|
||||
|
||||
every { mapper.toFinalTableName(desc1) } returns table1
|
||||
every { mapper.toFinalTableName(desc2) } returns table2
|
||||
|
||||
val result = resolver.getTableNameMapping(descriptors)
|
||||
|
||||
assertEquals(2, result.size)
|
||||
assertEquals(table1, result[desc1])
|
||||
assertEquals(table2, result[desc2])
|
||||
}
|
||||
|
||||
// We're testing some internals here but this represents an external API with the destination
|
||||
// so it's worth preserving.
|
||||
@Test
|
||||
fun `handles table name collision with hash suffix`() {
|
||||
val resolver = TableNameResolver(mapper)
|
||||
val desc1 = DestinationStream.Descriptor("namespace", "stream1")
|
||||
val desc2 = DestinationStream.Descriptor("namespace", "stream2")
|
||||
val descriptors = setOf(desc1, desc2)
|
||||
|
||||
val collisionTableName = TableName("namespace", "same_table")
|
||||
val hashedTableName = TableName("namespace", "stream2_hash")
|
||||
|
||||
every { mapper.toFinalTableName(any()) } returnsMany
|
||||
listOf(
|
||||
// call with desc1
|
||||
collisionTableName,
|
||||
// call with desc2
|
||||
collisionTableName,
|
||||
// call with desc2 and hash appended
|
||||
hashedTableName,
|
||||
)
|
||||
|
||||
val result = resolver.getTableNameMapping(descriptors)
|
||||
|
||||
assertEquals(2, result.size)
|
||||
assertEquals(collisionTableName, result[desc1])
|
||||
assertEquals(hashedTableName, result[desc2])
|
||||
}
|
||||
|
||||
// We're testing some internals here but this represents an external API with the destination
|
||||
// so it's worth preserving.
|
||||
@Test
|
||||
fun `handles multiple collisions`() {
|
||||
val resolver = TableNameResolver(mapper)
|
||||
val desc1 = DestinationStream.Descriptor("namespace", "stream1")
|
||||
val desc2 = DestinationStream.Descriptor("namespace", "stream2")
|
||||
val desc3 = DestinationStream.Descriptor("namespace", "stream3")
|
||||
val descriptors = setOf(desc1, desc2, desc3)
|
||||
|
||||
val collisionTableName = TableName("namespace", "same_table")
|
||||
val hashedTable2 = TableName("namespace", "stream2_hash")
|
||||
val hashedTable3 = TableName("namespace", "stream3_hash")
|
||||
|
||||
every { mapper.toFinalTableName(any()) } returnsMany
|
||||
listOf(
|
||||
// call with desc1
|
||||
collisionTableName,
|
||||
// call with desc2
|
||||
collisionTableName,
|
||||
// call with desc2 and hash appended
|
||||
hashedTable2,
|
||||
// call with desc3
|
||||
collisionTableName,
|
||||
// call with desc3 and hash appended
|
||||
hashedTable3,
|
||||
)
|
||||
|
||||
val result = resolver.getTableNameMapping(descriptors)
|
||||
|
||||
assertEquals(3, result.size)
|
||||
assertEquals(collisionTableName, result[desc1])
|
||||
assertEquals(hashedTable2, result[desc2])
|
||||
assertEquals(hashedTable3, result[desc3])
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `handles empty set`() {
|
||||
val resolver = TableNameResolver(mapper)
|
||||
val result = resolver.getTableNameMapping(emptySet())
|
||||
|
||||
assertEquals(0, result.size)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `handles single stream`() {
|
||||
val resolver = TableNameResolver(mapper)
|
||||
val desc = DestinationStream.Descriptor("namespace", "stream")
|
||||
val table = TableName("namespace", "stream")
|
||||
|
||||
every { mapper.toFinalTableName(desc) } returns table
|
||||
|
||||
val result = resolver.getTableNameMapping(setOf(desc))
|
||||
|
||||
assertEquals(1, result.size)
|
||||
assertEquals(table, result[desc])
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,105 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.schema
|
||||
|
||||
import io.airbyte.cdk.load.command.Append
|
||||
import io.airbyte.cdk.load.command.Dedupe
|
||||
import io.airbyte.cdk.load.command.ImportType
|
||||
import io.airbyte.cdk.load.component.ColumnType
|
||||
import io.airbyte.cdk.load.data.FieldType
|
||||
import io.airbyte.cdk.load.data.IntegerType
|
||||
import io.airbyte.cdk.load.data.StringType
|
||||
import io.airbyte.cdk.load.schema.model.StreamTableSchema
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import io.mockk.every
|
||||
import io.mockk.impl.annotations.MockK
|
||||
import io.mockk.junit5.MockKExtension
|
||||
import java.util.stream.Stream
|
||||
import org.junit.jupiter.api.Assertions.assertEquals
|
||||
import org.junit.jupiter.api.extension.ExtendWith
|
||||
import org.junit.jupiter.params.ParameterizedTest
|
||||
import org.junit.jupiter.params.provider.Arguments
|
||||
import org.junit.jupiter.params.provider.MethodSource
|
||||
|
||||
@ExtendWith(MockKExtension::class)
|
||||
class TableSchemaFactoryTest {
|
||||
@MockK private lateinit var mapper: TableSchemaMapper
|
||||
|
||||
@MockK private lateinit var colNameResolver: ColumnNameResolver
|
||||
|
||||
@ParameterizedTest
|
||||
@MethodSource("schemaTestCases")
|
||||
fun `creates correct StreamTableSchema`(
|
||||
inputSchema: Map<String, FieldType>,
|
||||
importType: ImportType,
|
||||
columnNameMapping: Map<String, String>
|
||||
) {
|
||||
val factory = TableSchemaFactory(mapper, colNameResolver)
|
||||
val finalTableName = TableName("namespace", "table")
|
||||
val tempTableName = TableName("namespace", "table_tmp")
|
||||
|
||||
every { mapper.toTempTableName(finalTableName) } returns tempTableName
|
||||
every { colNameResolver.getColumnNameMapping(inputSchema.keys) } returns columnNameMapping
|
||||
every { mapper.toColumnType(any()) } returns ColumnType("test_type", false)
|
||||
every { mapper.toFinalSchema(any()) } answers { firstArg<StreamTableSchema>() }
|
||||
|
||||
val result = factory.make(finalTableName, inputSchema, importType)
|
||||
|
||||
assertEquals(finalTableName, result.tableNames.finalTableName)
|
||||
assertEquals(tempTableName, result.tableNames.tempTableName)
|
||||
assertEquals(inputSchema, result.columnSchema.inputSchema)
|
||||
assertEquals(columnNameMapping, result.columnSchema.inputToFinalColumnNames)
|
||||
assertEquals(importType, result.importType)
|
||||
|
||||
val expectedFinalSchema =
|
||||
columnNameMapping
|
||||
.map { (_, finalName) ->
|
||||
val columnType = ColumnType("test_type", false)
|
||||
finalName to columnType
|
||||
}
|
||||
.toMap()
|
||||
|
||||
assertEquals(expectedFinalSchema, result.columnSchema.finalSchema)
|
||||
}
|
||||
|
||||
companion object {
|
||||
@JvmStatic
|
||||
fun schemaTestCases(): Stream<Arguments> =
|
||||
Stream.of(
|
||||
Arguments.of(
|
||||
mapOf(
|
||||
"id" to FieldType(IntegerType, false),
|
||||
"name" to FieldType(StringType, false),
|
||||
),
|
||||
Append,
|
||||
mapOf("id" to "id_final", "name" to "name_final")
|
||||
),
|
||||
Arguments.of(
|
||||
mapOf(
|
||||
"id" to FieldType(IntegerType, false),
|
||||
"name" to FieldType(StringType, false),
|
||||
"updated_at" to FieldType(StringType, false),
|
||||
),
|
||||
Dedupe(listOf(listOf("id")), listOf("updated_at")),
|
||||
mapOf("id" to "id", "name" to "name", "updated_at" to "updated_at")
|
||||
),
|
||||
Arguments.of(emptyMap<String, FieldType>(), Append, emptyMap<String, String>()),
|
||||
Arguments.of(
|
||||
mapOf(
|
||||
"id1" to FieldType(IntegerType, false),
|
||||
"id2" to FieldType(IntegerType, false),
|
||||
"data" to FieldType(StringType, false),
|
||||
),
|
||||
Dedupe(listOf(listOf("id1", "id2")), emptyList()),
|
||||
mapOf("id1" to "id1", "id2" to "id2", "data" to "data")
|
||||
),
|
||||
Arguments.of(
|
||||
mapOf("value" to FieldType(StringType, false)),
|
||||
Append,
|
||||
mapOf("value" to "value")
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.schema.defaults
|
||||
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.component.ColumnType
|
||||
import io.airbyte.cdk.load.data.BooleanType
|
||||
import io.airbyte.cdk.load.data.FieldType
|
||||
import io.airbyte.cdk.load.data.IntegerType
|
||||
import io.airbyte.cdk.load.data.NumberType
|
||||
import io.airbyte.cdk.load.data.StringType
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import java.util.stream.Stream
|
||||
import org.junit.jupiter.api.Assertions
|
||||
import org.junit.jupiter.api.Test
|
||||
import org.junit.jupiter.params.ParameterizedTest
|
||||
import org.junit.jupiter.params.provider.Arguments
|
||||
import org.junit.jupiter.params.provider.MethodSource
|
||||
|
||||
class NoopTableSchemaMapperTest {
|
||||
private val mapper = NoopTableSchemaMapper()
|
||||
|
||||
@Test
|
||||
fun `toFinalTableName returns unchanged table name`() {
|
||||
val desc1 = DestinationStream.Descriptor("namespace", "name")
|
||||
val result1 = mapper.toFinalTableName(desc1)
|
||||
Assertions.assertEquals(TableName("namespace", "name"), result1)
|
||||
|
||||
val desc2 = DestinationStream.Descriptor(null, "name")
|
||||
val result2 = mapper.toFinalTableName(desc2)
|
||||
Assertions.assertEquals(TableName("", "name"), result2)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `toTempTableName returns unchanged table name`() {
|
||||
val tableName = TableName("namespace", "name")
|
||||
val result = mapper.toTempTableName(tableName)
|
||||
Assertions.assertEquals(tableName, result)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `toColumnName returns unchanged column name`() {
|
||||
Assertions.assertEquals("column_name", mapper.toColumnName("column_name"))
|
||||
Assertions.assertEquals("UPPERCASE", mapper.toColumnName("UPPERCASE"))
|
||||
Assertions.assertEquals("123_numbers", mapper.toColumnName("123_numbers"))
|
||||
Assertions.assertEquals("special@#chars", mapper.toColumnName("special@#chars"))
|
||||
Assertions.assertEquals("", mapper.toColumnName(""))
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@MethodSource("fieldTypeTestCases")
|
||||
fun `toColumnType maps field types as strings`(
|
||||
fieldType: FieldType,
|
||||
) {
|
||||
val result = mapper.toColumnType(fieldType)
|
||||
Assertions.assertEquals(ColumnType(fieldType.type.toString(), fieldType.nullable), result)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `handles empty and special cases`() {
|
||||
val emptyDesc = DestinationStream.Descriptor("", "")
|
||||
Assertions.assertEquals(TableName("", ""), mapper.toFinalTableName(emptyDesc))
|
||||
|
||||
val emptyTable = TableName("", "")
|
||||
Assertions.assertEquals(emptyTable, mapper.toTempTableName(emptyTable))
|
||||
|
||||
Assertions.assertEquals("", mapper.toColumnName(""))
|
||||
}
|
||||
|
||||
companion object {
|
||||
@JvmStatic
|
||||
fun fieldTypeTestCases(): Stream<Arguments> =
|
||||
Stream.of(
|
||||
Arguments.of(FieldType(StringType, false)),
|
||||
Arguments.of(FieldType(IntegerType, false)),
|
||||
Arguments.of(FieldType(BooleanType, true)),
|
||||
Arguments.of(FieldType(NumberType, false)),
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,249 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.schema.model
|
||||
|
||||
import io.airbyte.cdk.load.command.Append
|
||||
import io.airbyte.cdk.load.command.Dedupe
|
||||
import io.airbyte.cdk.load.component.ColumnType
|
||||
import io.airbyte.cdk.load.data.FieldType
|
||||
import io.airbyte.cdk.load.data.IntegerType
|
||||
import io.airbyte.cdk.load.data.StringType
|
||||
import org.junit.jupiter.api.Assertions.assertEquals
|
||||
import org.junit.jupiter.api.Test
|
||||
|
||||
class StreamTableSchemaTest {
|
||||
@Test
|
||||
fun `getCursor returns mapped column names for dedupe`() {
|
||||
val columnSchema =
|
||||
ColumnSchema(
|
||||
inputSchema = Fixtures.cursorColumns,
|
||||
inputToFinalColumnNames = Fixtures.cursorColumnMapping,
|
||||
finalSchema = Fixtures.cursorFinalSchema,
|
||||
)
|
||||
|
||||
val streamTableSchema =
|
||||
StreamTableSchema(
|
||||
tableNames = Fixtures.defaultTableNames,
|
||||
columnSchema = columnSchema,
|
||||
importType =
|
||||
Dedupe(
|
||||
primaryKey = listOf(listOf("id")),
|
||||
cursor = listOf("updated_at", "modified_date"),
|
||||
),
|
||||
)
|
||||
|
||||
val result = streamTableSchema.getCursor()
|
||||
|
||||
assertEquals(listOf("updated_at_final", "modified_date_final"), result)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `getCursor returns empty list for append`() {
|
||||
val columnSchema =
|
||||
ColumnSchema(
|
||||
inputSchema = mapOf("updated_at" to FieldType(IntegerType, false)),
|
||||
inputToFinalColumnNames = mapOf("updated_at" to "updated_at_final"),
|
||||
finalSchema = mapOf("updated_at_final" to ColumnType("INTEGER", false)),
|
||||
)
|
||||
|
||||
val streamTableSchema =
|
||||
StreamTableSchema(
|
||||
tableNames = Fixtures.defaultTableNames,
|
||||
columnSchema = columnSchema,
|
||||
importType = Append,
|
||||
)
|
||||
|
||||
val result = streamTableSchema.getCursor()
|
||||
|
||||
assertEquals(emptyList<String>(), result)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `getPrimaryKey returns mapped column names for dedupe`() {
|
||||
val columnSchema =
|
||||
ColumnSchema(
|
||||
inputSchema = Fixtures.primaryKeyColumns,
|
||||
inputToFinalColumnNames = Fixtures.primaryKeyColumnMapping,
|
||||
finalSchema = Fixtures.primaryKeyFinalSchema,
|
||||
)
|
||||
|
||||
val streamTableSchema =
|
||||
StreamTableSchema(
|
||||
tableNames = Fixtures.defaultTableNames,
|
||||
columnSchema = columnSchema,
|
||||
importType =
|
||||
Dedupe(
|
||||
primaryKey = listOf(listOf("id"), listOf("user_id", "org_id")),
|
||||
cursor = emptyList()
|
||||
)
|
||||
)
|
||||
|
||||
val result = streamTableSchema.getPrimaryKey()
|
||||
|
||||
assertEquals(listOf(listOf("id_final"), listOf("user_id_final", "org_id_final")), result)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `getPrimaryKey returns empty list for append`() {
|
||||
val columnSchema =
|
||||
ColumnSchema(
|
||||
inputSchema = mapOf("id" to FieldType(IntegerType, false)),
|
||||
inputToFinalColumnNames = mapOf("id" to "id_final"),
|
||||
finalSchema = mapOf("id_final" to ColumnType("INTEGER", false))
|
||||
)
|
||||
|
||||
val streamTableSchema =
|
||||
StreamTableSchema(
|
||||
tableNames = Fixtures.defaultTableNames,
|
||||
columnSchema = columnSchema,
|
||||
importType = Append,
|
||||
)
|
||||
|
||||
val result = streamTableSchema.getPrimaryKey()
|
||||
|
||||
assertEquals(emptyList<List<String>>(), result)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `getFinalColumnName returns mapped name`() {
|
||||
val columnSchema =
|
||||
ColumnSchema(
|
||||
inputSchema =
|
||||
mapOf(
|
||||
"original_name" to FieldType(StringType, false),
|
||||
"another_column" to FieldType(IntegerType, false),
|
||||
),
|
||||
inputToFinalColumnNames =
|
||||
mapOf("original_name" to "mapped_name", "another_column" to "another_mapped"),
|
||||
finalSchema =
|
||||
mapOf(
|
||||
"mapped_name" to ColumnType("STRING", false),
|
||||
"another_mapped" to ColumnType("INTEGER", false)
|
||||
)
|
||||
)
|
||||
|
||||
val streamTableSchema =
|
||||
StreamTableSchema(
|
||||
tableNames = Fixtures.defaultTableNames,
|
||||
columnSchema = columnSchema,
|
||||
importType = Append,
|
||||
)
|
||||
|
||||
assertEquals("mapped_name", streamTableSchema.getFinalColumnName("original_name"))
|
||||
assertEquals("another_mapped", streamTableSchema.getFinalColumnName("another_column"))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `handles empty cursor and primary key for dedupe`() {
|
||||
val columnSchema =
|
||||
ColumnSchema(
|
||||
inputSchema = emptyMap(),
|
||||
inputToFinalColumnNames = emptyMap(),
|
||||
finalSchema = emptyMap()
|
||||
)
|
||||
|
||||
val streamTableSchema =
|
||||
StreamTableSchema(
|
||||
tableNames = Fixtures.defaultTableNames,
|
||||
columnSchema = columnSchema,
|
||||
importType = Dedupe(primaryKey = emptyList(), cursor = emptyList())
|
||||
)
|
||||
|
||||
assertEquals(emptyList<String>(), streamTableSchema.getCursor())
|
||||
assertEquals(emptyList<List<String>>(), streamTableSchema.getPrimaryKey())
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `handles complex composite primary key mapping`() {
|
||||
val columnSchema =
|
||||
ColumnSchema(
|
||||
inputSchema = Fixtures.compositeKeyColumns,
|
||||
inputToFinalColumnNames = Fixtures.compositeKeyColumnMapping,
|
||||
finalSchema = Fixtures.compositeKeyFinalSchema,
|
||||
)
|
||||
|
||||
val streamTableSchema =
|
||||
StreamTableSchema(
|
||||
tableNames = Fixtures.defaultTableNames,
|
||||
columnSchema = columnSchema,
|
||||
importType =
|
||||
Dedupe(
|
||||
primaryKey = listOf(listOf("tenant_id", "region_code", "product_id")),
|
||||
cursor = listOf("tenant_id")
|
||||
)
|
||||
)
|
||||
|
||||
assertEquals(
|
||||
listOf(listOf("TENANT_ID", "REGION_CODE", "PRODUCT_ID")),
|
||||
streamTableSchema.getPrimaryKey()
|
||||
)
|
||||
assertEquals(listOf("TENANT_ID"), streamTableSchema.getCursor())
|
||||
}
|
||||
|
||||
object Fixtures {
|
||||
val defaultTableName = TableName("namespace", "table")
|
||||
val defaultTableNames = TableNames(finalTableName = defaultTableName)
|
||||
|
||||
val cursorColumns =
|
||||
mapOf(
|
||||
"updated_at" to FieldType(IntegerType, false),
|
||||
"modified_date" to FieldType(StringType, false),
|
||||
)
|
||||
|
||||
val cursorColumnMapping =
|
||||
mapOf(
|
||||
"updated_at" to "updated_at_final",
|
||||
"modified_date" to "modified_date_final",
|
||||
)
|
||||
|
||||
val cursorFinalSchema =
|
||||
mapOf(
|
||||
"updated_at_final" to ColumnType("INTEGER", false),
|
||||
"modified_date_final" to ColumnType("STRING", false),
|
||||
)
|
||||
|
||||
val primaryKeyColumns =
|
||||
mapOf(
|
||||
"id" to FieldType(IntegerType, false),
|
||||
"user_id" to FieldType(IntegerType, false),
|
||||
"org_id" to FieldType(IntegerType, false),
|
||||
)
|
||||
|
||||
val primaryKeyColumnMapping =
|
||||
mapOf(
|
||||
"id" to "id_final",
|
||||
"user_id" to "user_id_final",
|
||||
"org_id" to "org_id_final",
|
||||
)
|
||||
|
||||
val primaryKeyFinalSchema =
|
||||
mapOf(
|
||||
"id_final" to ColumnType("INTEGER", false),
|
||||
"user_id_final" to ColumnType("INTEGER", false),
|
||||
"org_id_final" to ColumnType("INTEGER", false),
|
||||
)
|
||||
|
||||
val compositeKeyColumns =
|
||||
mapOf(
|
||||
"tenant_id" to FieldType(IntegerType, false),
|
||||
"region_code" to FieldType(StringType, false),
|
||||
"product_id" to FieldType(IntegerType, false),
|
||||
)
|
||||
|
||||
val compositeKeyColumnMapping =
|
||||
mapOf(
|
||||
"tenant_id" to "TENANT_ID",
|
||||
"region_code" to "REGION_CODE",
|
||||
"product_id" to "PRODUCT_ID",
|
||||
)
|
||||
|
||||
val compositeKeyFinalSchema =
|
||||
mapOf(
|
||||
"TENANT_ID" to ColumnType("INTEGER", false),
|
||||
"REGION_CODE" to ColumnType("STRING", false),
|
||||
"PRODUCT_ID" to ColumnType("INTEGER", false),
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -43,7 +43,22 @@ class CheckpointManagerUTest {
|
||||
generationId = 10L,
|
||||
minimumGenerationId = 10L,
|
||||
syncId = 101L,
|
||||
namespaceMapper = NamespaceMapper()
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
tableSchema =
|
||||
io.airbyte.cdk.load.schema.model.StreamTableSchema(
|
||||
tableNames =
|
||||
io.airbyte.cdk.load.schema.model.TableNames(
|
||||
finalTableName =
|
||||
io.airbyte.cdk.load.schema.model.TableName("test", "stream1")
|
||||
),
|
||||
columnSchema =
|
||||
io.airbyte.cdk.load.schema.model.ColumnSchema(
|
||||
inputSchema = mapOf(),
|
||||
inputToFinalColumnNames = mapOf(),
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = Append,
|
||||
)
|
||||
)
|
||||
|
||||
private val stream2 =
|
||||
@@ -55,7 +70,22 @@ class CheckpointManagerUTest {
|
||||
generationId = 10L,
|
||||
minimumGenerationId = 10L,
|
||||
syncId = 101L,
|
||||
namespaceMapper = NamespaceMapper()
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
tableSchema =
|
||||
io.airbyte.cdk.load.schema.model.StreamTableSchema(
|
||||
tableNames =
|
||||
io.airbyte.cdk.load.schema.model.TableNames(
|
||||
finalTableName =
|
||||
io.airbyte.cdk.load.schema.model.TableName("test", "stream2")
|
||||
),
|
||||
columnSchema =
|
||||
io.airbyte.cdk.load.schema.model.ColumnSchema(
|
||||
inputSchema = mapOf(),
|
||||
inputToFinalColumnNames = mapOf(),
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = Append,
|
||||
)
|
||||
)
|
||||
|
||||
@BeforeEach
|
||||
|
||||
@@ -8,6 +8,10 @@ import io.airbyte.cdk.load.data.FieldType
|
||||
import io.airbyte.cdk.load.data.IntegerType
|
||||
import io.airbyte.cdk.load.data.ObjectType
|
||||
import io.airbyte.cdk.load.data.StringType
|
||||
import io.airbyte.cdk.load.schema.model.ColumnSchema
|
||||
import io.airbyte.cdk.load.schema.model.StreamTableSchema
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import io.airbyte.cdk.load.schema.model.TableNames
|
||||
import io.micronaut.context.annotation.Factory
|
||||
import io.micronaut.context.annotation.Primary
|
||||
import io.micronaut.context.annotation.Requires
|
||||
@@ -18,8 +22,21 @@ import jakarta.inject.Singleton
|
||||
* `@MicronautTest(environments = [ ..., MockDestinationCatalog])`.
|
||||
*/
|
||||
@Factory
|
||||
class MockDestinationCatalogFactory : DestinationCatalogFactory {
|
||||
class MockDestinationCatalogFactory {
|
||||
companion object {
|
||||
val tableNames = TableNames(finalTableName = TableName("test", "stream"))
|
||||
val tableSchema =
|
||||
StreamTableSchema(
|
||||
columnSchema =
|
||||
ColumnSchema(
|
||||
inputSchema = mapOf(),
|
||||
inputToFinalColumnNames = mapOf(),
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = Append,
|
||||
tableNames = tableNames,
|
||||
)
|
||||
|
||||
val stream1 =
|
||||
DestinationStream(
|
||||
unmappedNamespace = "test",
|
||||
@@ -36,7 +53,8 @@ class MockDestinationCatalogFactory : DestinationCatalogFactory {
|
||||
generationId = 42,
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = NamespaceMapper()
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
tableSchema = tableSchema,
|
||||
)
|
||||
val stream2 =
|
||||
DestinationStream(
|
||||
@@ -54,14 +72,15 @@ class MockDestinationCatalogFactory : DestinationCatalogFactory {
|
||||
generationId = 42,
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = NamespaceMapper()
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
tableSchema = tableSchema,
|
||||
)
|
||||
}
|
||||
|
||||
@Singleton
|
||||
@Primary
|
||||
@Requires(env = ["MockDestinationCatalog"])
|
||||
override fun make(): DestinationCatalog {
|
||||
fun make(): DestinationCatalog {
|
||||
return DestinationCatalog(streams = listOf(stream1, stream2))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,259 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.component
|
||||
|
||||
import io.airbyte.cdk.load.command.Append
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.command.NamespaceMapper
|
||||
import io.airbyte.cdk.load.data.*
|
||||
import io.airbyte.cdk.load.dataflow.aggregate.AggregateFactory
|
||||
import io.airbyte.cdk.load.dataflow.aggregate.StoreKey
|
||||
import io.airbyte.cdk.load.dataflow.state.PartitionKey
|
||||
import io.airbyte.cdk.load.dataflow.transform.RecordDTO
|
||||
import io.airbyte.cdk.load.schema.model.ColumnSchema
|
||||
import io.airbyte.cdk.load.schema.model.StreamTableSchema
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import io.airbyte.cdk.load.schema.model.TableNames
|
||||
import io.airbyte.cdk.load.write.DestinationWriter
|
||||
import java.util.UUID
|
||||
import kotlinx.coroutines.runBlocking
|
||||
import org.junit.jupiter.api.Assertions.*
|
||||
|
||||
/**
|
||||
* Validates basic Micronaut DI wiring and write path functionality.
|
||||
*
|
||||
* Tests:
|
||||
* 1. all beans are injectable - Catches missing @Singleton, circular dependencies, missing beans
|
||||
* 2. writer setup completes - Validates namespace creation and status gathering
|
||||
* 3. can create append stream loader - Validates StreamLoader instantiation
|
||||
* 4. stream loader start creates table - Validates table creation
|
||||
* 5. can write one record - Full write path validation (most important)
|
||||
*
|
||||
* Setup:
|
||||
* 1. Add testFixtures dependency to build.gradle
|
||||
* 2. Create application-component.yml with airbyte.connector.operation="write"
|
||||
* 3. Provide @Primary ConfiguredAirbyteCatalog bean (use DefaultComponentTestCatalog.make())
|
||||
* 4. Start database in @BeforeAll (testcontainer or real instance)
|
||||
* 5. If Writer requires catalog streams: inject DestinationCatalog and override createTestStream()
|
||||
*
|
||||
* Troubleshooting:
|
||||
* - DI errors = test working correctly, add missing beans
|
||||
* - "Property doesn't exist" = missing application-component.yml
|
||||
* - "Catalog must have at least one stream" = missing ConfiguredAirbyteCatalog bean
|
||||
* - NullPointerException in createStreamLoader = override createTestStream()
|
||||
*/
|
||||
interface ConnectorWiringSuite {
|
||||
|
||||
// Required: Provided by connector test via Micronaut injection
|
||||
val writer: DestinationWriter
|
||||
val client: TableOperationsClient
|
||||
val aggregateFactory: AggregateFactory
|
||||
|
||||
// Optional: Override to provide custom test namespace (defaults to "test")
|
||||
val testNamespace: String
|
||||
get() = "test"
|
||||
|
||||
/**
|
||||
* Test: All core beans are injectable without DI errors.
|
||||
*
|
||||
* Validates that Micronaut can create all required beans:
|
||||
* - DestinationWriter
|
||||
* - TableOperationsClient
|
||||
* - AggregateFactory
|
||||
*
|
||||
* This catches missing @Singleton annotations, circular dependencies, and missing bean
|
||||
* definitions.
|
||||
*/
|
||||
fun `all beans are injectable`() {
|
||||
assertNotNull(writer, "DestinationWriter should be injectable")
|
||||
assertNotNull(client, "TableOperationsClient should be injectable")
|
||||
assertNotNull(aggregateFactory, "AggregateFactory should be injectable")
|
||||
}
|
||||
|
||||
/**
|
||||
* Test: Writer.setup() executes without errors.
|
||||
*
|
||||
* Validates:
|
||||
* - Namespace creation works
|
||||
* - Initial status gathering works
|
||||
* - No crashes during setup phase
|
||||
*/
|
||||
fun `writer setup completes`() = runBlocking {
|
||||
// Should not throw
|
||||
writer.setup()
|
||||
}
|
||||
|
||||
/**
|
||||
* Test: Writer can create StreamLoader for append mode.
|
||||
*
|
||||
* Validates:
|
||||
* - Writer.createStreamLoader() returns non-null
|
||||
* - StreamLoader instantiation doesn't crash
|
||||
* - Append mode is supported
|
||||
*/
|
||||
fun `can create append stream loader`() = runBlocking {
|
||||
writer.setup()
|
||||
val stream = createTestStream(importType = Append)
|
||||
val loader = writer.createStreamLoader(stream)
|
||||
assertNotNull(loader, "StreamLoader should be created for append mode")
|
||||
}
|
||||
|
||||
/**
|
||||
* Test: StreamLoader.start() creates tables.
|
||||
*
|
||||
* Validates:
|
||||
* - StreamLoader.start() runs without error
|
||||
* - Table is created in database
|
||||
* - Table can be queried
|
||||
*/
|
||||
fun `stream loader start creates table`() = runBlocking {
|
||||
writer.setup()
|
||||
val stream = createTestStream()
|
||||
val tableName = TableName(testNamespace, stream.mappedDescriptor.name)
|
||||
|
||||
try {
|
||||
val loader = writer.createStreamLoader(stream)
|
||||
|
||||
// Start should create table
|
||||
loader.start()
|
||||
|
||||
// Verify table exists
|
||||
assertTrue(
|
||||
client.tableExists(tableName),
|
||||
"Table ${tableName} should exist after StreamLoader.start()"
|
||||
)
|
||||
} finally {
|
||||
// Cleanup
|
||||
client.dropTable(tableName)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test: Write one record using StreamLoader (validates full write path).
|
||||
*
|
||||
* This is the most important test - validates the complete write path:
|
||||
* - Sets up writer (namespace creation, initial status gathering)
|
||||
* - Creates StreamLoader (which creates table)
|
||||
* - Writes one record through aggregate/buffer
|
||||
* - Verifies data appears in database
|
||||
*
|
||||
* If this test passes, your write path works end-to-end!
|
||||
*/
|
||||
fun `can write one record`() = runBlocking {
|
||||
writer.setup()
|
||||
val stream = createTestStream()
|
||||
val tableName = TableName(testNamespace, stream.mappedDescriptor.name)
|
||||
|
||||
try {
|
||||
// 1. Create namespace
|
||||
client.createNamespace(testNamespace)
|
||||
|
||||
// 2. Create and start StreamLoader
|
||||
val loader = writer.createStreamLoader(stream)
|
||||
loader.start()
|
||||
|
||||
// 3. Create aggregate for this stream
|
||||
val key = createStoreKey(stream)
|
||||
val aggregate = aggregateFactory.create(key)
|
||||
|
||||
// 4. Write one record
|
||||
val record = createTestRecord()
|
||||
aggregate.accept(record)
|
||||
aggregate.flush()
|
||||
|
||||
// 5. Verify data in database
|
||||
val count = client.countTable(tableName)
|
||||
assertEquals(1L, count, "Should have exactly 1 record after write. Got $count records.")
|
||||
|
||||
// 6. Close loader
|
||||
loader.close(hadNonzeroRecords = true, streamFailure = null)
|
||||
} finally {
|
||||
// Cleanup
|
||||
client.dropTable(tableName)
|
||||
}
|
||||
}
|
||||
|
||||
// ========== Helper Methods ==========
|
||||
|
||||
/**
|
||||
* Creates a minimal test stream for validation. Override this if you need custom stream
|
||||
* configuration.
|
||||
*/
|
||||
fun createTestStream(
|
||||
namespace: String = "test",
|
||||
name: String = "test_stream_${UUID.randomUUID()}",
|
||||
importType: io.airbyte.cdk.load.command.ImportType = Append
|
||||
): DestinationStream {
|
||||
return DestinationStream(
|
||||
unmappedNamespace = namespace,
|
||||
unmappedName = name,
|
||||
importType = importType,
|
||||
schema =
|
||||
ObjectType(
|
||||
properties =
|
||||
linkedMapOf(
|
||||
"id" to FieldType(IntegerType, nullable = false),
|
||||
"name" to FieldType(StringType, nullable = true)
|
||||
)
|
||||
),
|
||||
generationId = 0,
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = NamespaceMapper(), // Default identity mapper
|
||||
tableSchema =
|
||||
StreamTableSchema(
|
||||
columnSchema =
|
||||
ColumnSchema(
|
||||
inputSchema = mapOf(),
|
||||
inputToFinalColumnNames = mapOf(),
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = Append,
|
||||
tableNames = TableNames(finalTableName = TableName("namespace", "test")),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a StoreKey for the given stream. Used to retrieve aggregate from factory.
|
||||
*
|
||||
* Note: StoreKey is a typealias for DestinationStream.Descriptor
|
||||
*/
|
||||
fun createStoreKey(stream: DestinationStream): StoreKey {
|
||||
// StoreKey = DestinationStream.Descriptor
|
||||
return stream.mappedDescriptor
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a simple column name mapping for test stream. Maps column names to themselves
|
||||
* (identity mapping). Override if your database requires name transformation.
|
||||
*/
|
||||
fun createSimpleColumnMapping(): io.airbyte.cdk.load.table.ColumnNameMapping {
|
||||
return io.airbyte.cdk.load.table.ColumnNameMapping(mapOf("id" to "id", "name" to "name"))
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a test record with all required Airbyte metadata columns. Override this if you need
|
||||
* custom record structure.
|
||||
*/
|
||||
fun createTestRecord(): RecordDTO {
|
||||
return RecordDTO(
|
||||
fields =
|
||||
mapOf(
|
||||
// User columns
|
||||
"id" to IntegerValue(1),
|
||||
"name" to StringValue("Alice"),
|
||||
// Airbyte metadata columns (required)
|
||||
"_airbyte_raw_id" to StringValue(UUID.randomUUID().toString()),
|
||||
"_airbyte_extracted_at" to TimestampWithTimezoneValue("2024-01-01T00:00:00Z"),
|
||||
"_airbyte_meta" to ObjectValue(linkedMapOf()),
|
||||
"_airbyte_generation_id" to IntegerValue(0)
|
||||
),
|
||||
partitionKey = PartitionKey(""), // Empty partition for non-partitioned streams
|
||||
sizeBytes = 100,
|
||||
emittedAtMs = System.currentTimeMillis()
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.component
|
||||
|
||||
import com.fasterxml.jackson.databind.node.JsonNodeFactory
|
||||
import io.airbyte.protocol.models.v0.AirbyteStream
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog
|
||||
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream
|
||||
import io.airbyte.protocol.models.v0.DestinationSyncMode
|
||||
import io.airbyte.protocol.models.v0.SyncMode
|
||||
|
||||
/**
|
||||
* Utility for creating a default ConfiguredAirbyteCatalog for component tests.
|
||||
*
|
||||
* Provides a catalog with schema matching ConnectorWiringSuite.createTestRecord():
|
||||
* - id: integer
|
||||
* - name: string
|
||||
* - Airbyte metadata columns
|
||||
*
|
||||
* Usage in connector test config factory:
|
||||
* @Singleton @Primary fun catalog() = DefaultComponentTestCatalog.make()
|
||||
*/
|
||||
object DefaultComponentTestCatalog {
|
||||
fun make(): ConfiguredAirbyteCatalog {
|
||||
val jsonNodeFactory = JsonNodeFactory.instance
|
||||
val schema =
|
||||
jsonNodeFactory.objectNode().apply {
|
||||
put("type", "object")
|
||||
set<Nothing>(
|
||||
"properties",
|
||||
jsonNodeFactory.objectNode().apply {
|
||||
set<Nothing>(
|
||||
"id",
|
||||
jsonNodeFactory.objectNode().apply { put("type", "integer") }
|
||||
)
|
||||
set<Nothing>(
|
||||
"name",
|
||||
jsonNodeFactory.objectNode().apply { put("type", "string") }
|
||||
)
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
val stream =
|
||||
AirbyteStream()
|
||||
.withName("test_stream")
|
||||
.withNamespace("test")
|
||||
.withJsonSchema(schema)
|
||||
.withSupportedSyncModes(listOf(SyncMode.FULL_REFRESH))
|
||||
.withSourceDefinedCursor(false)
|
||||
.withSourceDefinedPrimaryKey(emptyList())
|
||||
|
||||
val configuredStream =
|
||||
ConfiguredAirbyteStream()
|
||||
.withStream(stream)
|
||||
.withSyncMode(SyncMode.FULL_REFRESH)
|
||||
.withDestinationSyncMode(DestinationSyncMode.APPEND)
|
||||
.withCursorField(emptyList())
|
||||
.withPrimaryKey(emptyList())
|
||||
.withGenerationId(0L)
|
||||
.withMinimumGenerationId(0L)
|
||||
.withSyncId(42L)
|
||||
|
||||
return ConfiguredAirbyteCatalog().withStreams(listOf(configuredStream))
|
||||
}
|
||||
}
|
||||
@@ -4,8 +4,6 @@
|
||||
|
||||
package io.airbyte.cdk.load.component
|
||||
|
||||
import io.airbyte.cdk.load.command.Append
|
||||
import io.airbyte.cdk.load.command.Dedupe
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.command.NamespaceMapper
|
||||
import io.airbyte.cdk.load.data.AirbyteValue
|
||||
@@ -30,11 +28,14 @@ import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_EXTRACTED_AT
|
||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_GENERATION_ID
|
||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_META
|
||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_RAW_ID
|
||||
import io.airbyte.cdk.load.schema.model.StreamTableSchema
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import io.airbyte.cdk.load.table.CDC_DELETED_AT_COLUMN
|
||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||
import io.airbyte.cdk.load.table.TableName
|
||||
import io.airbyte.cdk.load.util.Jsons
|
||||
import io.airbyte.cdk.util.invert
|
||||
import java.util.UUID
|
||||
import org.junit.jupiter.api.Assertions
|
||||
|
||||
/**
|
||||
* Common test fixtures and constants used across table operations test suites. Provides reusable
|
||||
@@ -44,6 +45,7 @@ object TableOperationsFixtures {
|
||||
// Common field names
|
||||
const val TEST_FIELD = "test"
|
||||
const val ID_FIELD = "id"
|
||||
const val DESCRIPTION_FIELD = "description"
|
||||
|
||||
// Common schemas
|
||||
val TEST_INTEGER_SCHEMA = ObjectType(linkedMapOf(TEST_FIELD to FieldType(IntegerType, true)))
|
||||
@@ -63,6 +65,7 @@ object TableOperationsFixtures {
|
||||
ID_FIELD to FieldType(StringType, true),
|
||||
TEST_FIELD to FieldType(IntegerType, true),
|
||||
CDC_DELETED_AT_COLUMN to FieldType(IntegerType, true),
|
||||
DESCRIPTION_FIELD to FieldType(StringType, true),
|
||||
),
|
||||
)
|
||||
|
||||
@@ -113,6 +116,7 @@ object TableOperationsFixtures {
|
||||
ID_FIELD to ID_FIELD,
|
||||
TEST_FIELD to TEST_FIELD,
|
||||
CDC_DELETED_AT_COLUMN to CDC_DELETED_AT_COLUMN,
|
||||
DESCRIPTION_FIELD to DESCRIPTION_FIELD,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -198,12 +202,16 @@ object TableOperationsFixtures {
|
||||
val UPSERT_SOURCE_RECORDS: List<Map<String, AirbyteValue>> =
|
||||
listOf(
|
||||
inputRecord(
|
||||
"5499cdef-1411-4c7e-987c-b22fe1284a49",
|
||||
"109d38b9-e001-4f62-86ce-4a457ab013a1",
|
||||
"2025-01-23T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
generationId = 1,
|
||||
ID_FIELD to StringValue("2"),
|
||||
TEST_FIELD to IntegerValue(1001),
|
||||
ID_FIELD to StringValue("0"),
|
||||
TEST_FIELD to IntegerValue(1000),
|
||||
DESCRIPTION_FIELD to
|
||||
StringValue(
|
||||
"New record, no existing record. Upsert should insert this record."
|
||||
),
|
||||
),
|
||||
inputRecord(
|
||||
"295eb05d-da91-4cf5-8d26-a2bf8b6e8ef7",
|
||||
@@ -213,24 +221,22 @@ object TableOperationsFixtures {
|
||||
ID_FIELD to StringValue("3"),
|
||||
TEST_FIELD to IntegerValue(1002),
|
||||
CDC_DELETED_AT_COLUMN to IntegerValue(1234),
|
||||
DESCRIPTION_FIELD to
|
||||
StringValue(
|
||||
"New deletion record with later cursor and extracted_at than existing record. Upsert should delete the existing record."
|
||||
),
|
||||
),
|
||||
inputRecord(
|
||||
"9110dcf0-2171-4daa-a934-695163950d98",
|
||||
"2025-01-23T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
generationId = 1,
|
||||
ID_FIELD to StringValue("4"),
|
||||
TEST_FIELD to IntegerValue(4),
|
||||
),
|
||||
// There are two records with id=5, which differ only in extracted_at.
|
||||
// The second record has non-null deleted_at, so we expect the record to be deleted.
|
||||
inputRecord(
|
||||
"35295b83-302f-49c3-af0f-cf093bc46def",
|
||||
"2025-01-23T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
generationId = 1,
|
||||
ID_FIELD to StringValue("5"),
|
||||
TEST_FIELD to IntegerValue(1004),
|
||||
TEST_FIELD to IntegerValue(5),
|
||||
DESCRIPTION_FIELD to
|
||||
StringValue(
|
||||
"Incoming record with no existing record, but there's a second incoming deletion record with later extracted_at. Upsert should discard this record."
|
||||
),
|
||||
),
|
||||
inputRecord(
|
||||
"5773cf6f-f8b7-48f2-8f23-728a4a4eb56d",
|
||||
@@ -238,8 +244,155 @@ object TableOperationsFixtures {
|
||||
linkedMapOf(),
|
||||
generationId = 1,
|
||||
ID_FIELD to StringValue("5"),
|
||||
TEST_FIELD to IntegerValue(1005),
|
||||
TEST_FIELD to IntegerValue(5),
|
||||
CDC_DELETED_AT_COLUMN to IntegerValue(1234),
|
||||
DESCRIPTION_FIELD to
|
||||
StringValue("Incoming deletion record. This record should be discarded."),
|
||||
),
|
||||
inputRecord(
|
||||
"1c4d0fc5-1e1e-4f7e-87c8-a46a722ee984",
|
||||
"2025-01-23T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
generationId = 1,
|
||||
ID_FIELD to StringValue("6"),
|
||||
TEST_FIELD to IntegerValue(6),
|
||||
DESCRIPTION_FIELD to
|
||||
StringValue(
|
||||
"Incoming record with no existing record, but there's a second incoming record with later extracted_at. Upsert should discard this record."
|
||||
),
|
||||
),
|
||||
inputRecord(
|
||||
"2ddf5ee9-08a1-4319-824d-187d878edac5",
|
||||
"2025-01-23T01:00:00Z",
|
||||
linkedMapOf(),
|
||||
generationId = 1,
|
||||
ID_FIELD to StringValue("6"),
|
||||
TEST_FIELD to IntegerValue(6),
|
||||
DESCRIPTION_FIELD to
|
||||
StringValue(
|
||||
"Incoming record with no existing record. Upsert should insert this record."
|
||||
),
|
||||
),
|
||||
inputRecord(
|
||||
"e8379b8f-e437-4d55-9d16-76f5e6e942d6",
|
||||
"2025-01-23T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
generationId = 1,
|
||||
ID_FIELD to StringValue("7"),
|
||||
TEST_FIELD to IntegerValue(7),
|
||||
CDC_DELETED_AT_COLUMN to IntegerValue(1234),
|
||||
DESCRIPTION_FIELD to
|
||||
StringValue(
|
||||
"Incoming deletion record, but there's a second incoming record with later extracted_at. Upsert should discard this record."
|
||||
),
|
||||
),
|
||||
inputRecord(
|
||||
"e56fc753-b55a-439b-9b16-528596e2ca3a",
|
||||
"2025-01-23T01:00:00Z",
|
||||
linkedMapOf(),
|
||||
generationId = 1,
|
||||
ID_FIELD to StringValue("7"),
|
||||
TEST_FIELD to IntegerValue(7),
|
||||
DESCRIPTION_FIELD to
|
||||
StringValue(
|
||||
"Incoming record with no existing record. Upsert should insert this record."
|
||||
),
|
||||
),
|
||||
inputRecord(
|
||||
"645efad2-f1e6-438a-b29f-15ae5d096015",
|
||||
"2025-01-23T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
generationId = 1,
|
||||
ID_FIELD to StringValue("8"),
|
||||
TEST_FIELD to IntegerValue(8),
|
||||
DESCRIPTION_FIELD to
|
||||
StringValue(
|
||||
"Incoming record with earlier cursor and later extracted_at than existing record. Upsert should discard this record (prefer cursor over extracted_at)."
|
||||
),
|
||||
),
|
||||
inputRecord(
|
||||
"f74b8ddb-45d0-4e30-af25-66885e57a0e6",
|
||||
"2025-01-23T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
generationId = 1,
|
||||
ID_FIELD to StringValue("9"),
|
||||
TEST_FIELD to IntegerValue(9),
|
||||
DESCRIPTION_FIELD to
|
||||
StringValue(
|
||||
"Incoming record with equal cursor and later extracted_at than existing record. Upsert should update with this record (break ties with extracted_at)."
|
||||
),
|
||||
),
|
||||
inputRecord(
|
||||
"877cceb6-23a6-4e7b-92e3-59ca46f8fd6c",
|
||||
"2025-01-23T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
generationId = 1,
|
||||
ID_FIELD to StringValue("10"),
|
||||
TEST_FIELD to IntegerValue(1010),
|
||||
DESCRIPTION_FIELD to
|
||||
StringValue(
|
||||
"Incoming record with later cursor and later extracted_at than existing record. Upsert should update with this record."
|
||||
),
|
||||
),
|
||||
inputRecord(
|
||||
"20410b34-7bb0-4ba5-9c61-0dd23bfeee6d",
|
||||
"2025-01-22T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
generationId = 1,
|
||||
ID_FIELD to StringValue("11"),
|
||||
TEST_FIELD to IntegerValue(11),
|
||||
DESCRIPTION_FIELD to
|
||||
StringValue(
|
||||
"Incoming record with earlier cursor and equal extracted_at than existing record. Upsert should discard this record."
|
||||
),
|
||||
),
|
||||
inputRecord(
|
||||
"70fdf9b0-ade0-4d30-9131-ba217ef506da",
|
||||
"2025-01-22T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
generationId = 1,
|
||||
ID_FIELD to StringValue("12"),
|
||||
TEST_FIELD to IntegerValue(1012),
|
||||
DESCRIPTION_FIELD to
|
||||
StringValue(
|
||||
"Incoming record with later cursor and equal extracted_at than existing record. Upsert should update with this record."
|
||||
),
|
||||
),
|
||||
inputRecord(
|
||||
"20949d9b-8ffc-4497-85e4-cda14abc4049",
|
||||
"2025-01-21T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
generationId = 1,
|
||||
ID_FIELD to StringValue("13"),
|
||||
TEST_FIELD to IntegerValue(13),
|
||||
DESCRIPTION_FIELD to
|
||||
StringValue(
|
||||
"Incoming record with earlier cursor and earlier extracted_at than existing record. Upsert should discard this record."
|
||||
),
|
||||
),
|
||||
inputRecord(
|
||||
"5808a0ef-3c6d-4d9a-851c-edbbc4852e18",
|
||||
"2025-01-21T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
generationId = 1,
|
||||
ID_FIELD to StringValue("14"),
|
||||
TEST_FIELD to IntegerValue(14),
|
||||
DESCRIPTION_FIELD to
|
||||
StringValue(
|
||||
"Incoming record with equal cursor and earlier extracted_at than existing record. Upsert should discard this record."
|
||||
),
|
||||
),
|
||||
inputRecord(
|
||||
"373127a7-a40e-4e23-890b-1a52114686ee",
|
||||
"2025-01-21T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
generationId = 1,
|
||||
ID_FIELD to StringValue("15"),
|
||||
TEST_FIELD to IntegerValue(1015),
|
||||
DESCRIPTION_FIELD to
|
||||
StringValue(
|
||||
"Incoming record with later cursor and earlier extracted_at than existing record. Upsert should update with this record."
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
@@ -249,7 +402,6 @@ object TableOperationsFixtures {
|
||||
*/
|
||||
val UPSERT_TARGET_RECORDS: List<Map<String, AirbyteValue>> =
|
||||
listOf(
|
||||
// id=1 has no incoming record, so it should remain untouched.
|
||||
inputRecord(
|
||||
"6317026e-12f9-4713-976e-ce43901bd7ce",
|
||||
"2025-01-22T00:00:00Z",
|
||||
@@ -257,18 +409,11 @@ object TableOperationsFixtures {
|
||||
1,
|
||||
ID_FIELD to StringValue("1"),
|
||||
TEST_FIELD to IntegerValue(1),
|
||||
DESCRIPTION_FIELD to
|
||||
StringValue(
|
||||
"Existing record, no incoming record. Upsert should preserve this record."
|
||||
),
|
||||
),
|
||||
// id=2 has a normal incoming record, which will overwrite this one.
|
||||
inputRecord(
|
||||
"46159e3a-9bf9-42d9-8bb7-9f47d37bd663",
|
||||
"2025-01-22T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
generationId = 1,
|
||||
ID_FIELD to StringValue("2"),
|
||||
TEST_FIELD to IntegerValue(2),
|
||||
),
|
||||
// id=3 has an incoming record with nonnull deleted_at, so this record should be
|
||||
// deleted.
|
||||
// TODO what about destinations with CDC soft deletes?
|
||||
// https://github.com/airbytehq/airbyte-internal-issues/issues/14911
|
||||
inputRecord(
|
||||
@@ -278,22 +423,121 @@ object TableOperationsFixtures {
|
||||
generationId = 1,
|
||||
ID_FIELD to StringValue("3"),
|
||||
TEST_FIELD to IntegerValue(3),
|
||||
DESCRIPTION_FIELD to
|
||||
StringValue(
|
||||
"Existing record with incoming deletion record with later cursor and extracted_at. Upsert should delete this record."
|
||||
),
|
||||
),
|
||||
// id=4 has an incoming record with the same cursor value (test=4) but later
|
||||
// extracted_at.
|
||||
// That record should replace this one.
|
||||
inputRecord(
|
||||
"02e22e03-587f-4d30-9718-994357407b65",
|
||||
"8086bdd6-6cf5-479e-a819-e5f347373804",
|
||||
"2025-01-22T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
generationId = 1,
|
||||
ID_FIELD to StringValue("4"),
|
||||
TEST_FIELD to IntegerValue(4),
|
||||
ID_FIELD to StringValue("8"),
|
||||
TEST_FIELD to IntegerValue(1008),
|
||||
DESCRIPTION_FIELD to
|
||||
StringValue(
|
||||
"Existing record with later cursor and earlier extracted_at than incoming record. Upsert should preserve this record (prefer cursor over extracted_at)."
|
||||
),
|
||||
),
|
||||
inputRecord(
|
||||
"b60e8b33-32f4-4da0-934b-87d14d9ed354",
|
||||
"2025-01-22T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
generationId = 1,
|
||||
ID_FIELD to StringValue("9"),
|
||||
TEST_FIELD to IntegerValue(9),
|
||||
DESCRIPTION_FIELD to
|
||||
StringValue(
|
||||
"Existing record with equal cursor and earlier extracted_at than incoming record. Upsert should discard this record (break ties with extracted_at)."
|
||||
),
|
||||
),
|
||||
inputRecord(
|
||||
"e79d163e-b594-4016-89b9-a85e385778bd",
|
||||
"2025-01-22T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
generationId = 1,
|
||||
ID_FIELD to StringValue("10"),
|
||||
TEST_FIELD to IntegerValue(10),
|
||||
DESCRIPTION_FIELD to
|
||||
StringValue(
|
||||
"Existing record with earlier cursor and earlier extracted_at than incoming record. Upsert should discard this record."
|
||||
),
|
||||
),
|
||||
inputRecord(
|
||||
"3d345fb2-254e-4968-89a6-f896a05fb831",
|
||||
"2025-01-22T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
generationId = 1,
|
||||
ID_FIELD to StringValue("11"),
|
||||
TEST_FIELD to IntegerValue(1011),
|
||||
DESCRIPTION_FIELD to
|
||||
StringValue(
|
||||
"Existing record with later cursor and equal extracted_at than incoming record. Upsert should preserve this record."
|
||||
),
|
||||
),
|
||||
inputRecord(
|
||||
"9c5262e6-44e3-41de-9a5a-c31bc0efdb68",
|
||||
"2025-01-22T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
generationId = 1,
|
||||
ID_FIELD to StringValue("12"),
|
||||
TEST_FIELD to IntegerValue(12),
|
||||
DESCRIPTION_FIELD to
|
||||
StringValue(
|
||||
"Existing record with earlier cursor and equal extracted_at than incoming record. Upsert should discard this record."
|
||||
),
|
||||
),
|
||||
inputRecord(
|
||||
"739a9347-267b-48af-a172-2030320e2193",
|
||||
"2025-01-22T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
generationId = 1,
|
||||
ID_FIELD to StringValue("13"),
|
||||
TEST_FIELD to IntegerValue(1013),
|
||||
DESCRIPTION_FIELD to
|
||||
StringValue(
|
||||
"Existing record with later cursor and later extracted_at than incoming record. Upsert should preserve this record."
|
||||
),
|
||||
),
|
||||
inputRecord(
|
||||
"70243c59-eadb-4840-90fa-be4ed57609fc",
|
||||
"2025-01-22T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
generationId = 1,
|
||||
ID_FIELD to StringValue("14"),
|
||||
TEST_FIELD to IntegerValue(14),
|
||||
DESCRIPTION_FIELD to
|
||||
StringValue(
|
||||
"Existing record with equal cursor and later extracted_at than incoming record. Upsert should preserve this record."
|
||||
),
|
||||
),
|
||||
inputRecord(
|
||||
"966e89ec-c0d2-4358-b8e5-bf9c713f5396",
|
||||
"2025-01-22T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
generationId = 1,
|
||||
ID_FIELD to StringValue("15"),
|
||||
TEST_FIELD to IntegerValue(15),
|
||||
DESCRIPTION_FIELD to
|
||||
StringValue(
|
||||
"Existing record with earlier cursor and later extracted_at than existing record. Upsert should discard this record."
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
val UPSERT_EXPECTED_RECORDS: List<Map<String, Any>> =
|
||||
listOf(
|
||||
outputRecord(
|
||||
"109d38b9-e001-4f62-86ce-4a457ab013a1",
|
||||
"2025-01-23T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
generationId = 1L,
|
||||
ID_FIELD to "0",
|
||||
TEST_FIELD to 1000L,
|
||||
DESCRIPTION_FIELD to
|
||||
"New record, no existing record. Upsert should insert this record.",
|
||||
),
|
||||
outputRecord(
|
||||
"6317026e-12f9-4713-976e-ce43901bd7ce",
|
||||
"2025-01-22T00:00:00Z",
|
||||
@@ -301,22 +545,108 @@ object TableOperationsFixtures {
|
||||
generationId = 1L,
|
||||
ID_FIELD to "1",
|
||||
TEST_FIELD to 1L,
|
||||
DESCRIPTION_FIELD to
|
||||
"Existing record, no incoming record. Upsert should preserve this record.",
|
||||
),
|
||||
outputRecord(
|
||||
"5499cdef-1411-4c7e-987c-b22fe1284a49",
|
||||
"2ddf5ee9-08a1-4319-824d-187d878edac5",
|
||||
"2025-01-23T01:00:00Z",
|
||||
linkedMapOf(),
|
||||
1L,
|
||||
ID_FIELD to "6",
|
||||
TEST_FIELD to 6L,
|
||||
DESCRIPTION_FIELD to
|
||||
"Incoming record with no existing record. Upsert should insert this record.",
|
||||
),
|
||||
outputRecord(
|
||||
"e56fc753-b55a-439b-9b16-528596e2ca3a",
|
||||
"2025-01-23T01:00:00Z",
|
||||
linkedMapOf(),
|
||||
1L,
|
||||
ID_FIELD to "7",
|
||||
TEST_FIELD to 7L,
|
||||
DESCRIPTION_FIELD to
|
||||
"Incoming record with no existing record. Upsert should insert this record.",
|
||||
),
|
||||
outputRecord(
|
||||
"8086bdd6-6cf5-479e-a819-e5f347373804",
|
||||
"2025-01-22T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
1L,
|
||||
ID_FIELD to "8",
|
||||
TEST_FIELD to 1008L,
|
||||
DESCRIPTION_FIELD to
|
||||
"Existing record with later cursor and earlier extracted_at than incoming record. Upsert should preserve this record (prefer cursor over extracted_at).",
|
||||
),
|
||||
outputRecord(
|
||||
"f74b8ddb-45d0-4e30-af25-66885e57a0e6",
|
||||
"2025-01-23T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
1L,
|
||||
ID_FIELD to "2",
|
||||
TEST_FIELD to 1001L,
|
||||
ID_FIELD to "9",
|
||||
TEST_FIELD to 9L,
|
||||
DESCRIPTION_FIELD to
|
||||
"Incoming record with equal cursor and later extracted_at than existing record. Upsert should update with this record (break ties with extracted_at).",
|
||||
),
|
||||
outputRecord(
|
||||
"9110dcf0-2171-4daa-a934-695163950d98",
|
||||
"877cceb6-23a6-4e7b-92e3-59ca46f8fd6c",
|
||||
"2025-01-23T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
1L,
|
||||
ID_FIELD to "4",
|
||||
TEST_FIELD to 4L,
|
||||
ID_FIELD to "10",
|
||||
TEST_FIELD to 1010L,
|
||||
DESCRIPTION_FIELD to
|
||||
"Incoming record with later cursor and later extracted_at than existing record. Upsert should update with this record.",
|
||||
),
|
||||
outputRecord(
|
||||
"3d345fb2-254e-4968-89a6-f896a05fb831",
|
||||
"2025-01-22T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
1L,
|
||||
ID_FIELD to "11",
|
||||
TEST_FIELD to 1011L,
|
||||
DESCRIPTION_FIELD to
|
||||
"Existing record with later cursor and equal extracted_at than incoming record. Upsert should preserve this record.",
|
||||
),
|
||||
outputRecord(
|
||||
"70fdf9b0-ade0-4d30-9131-ba217ef506da",
|
||||
"2025-01-22T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
1L,
|
||||
ID_FIELD to "12",
|
||||
TEST_FIELD to 1012L,
|
||||
DESCRIPTION_FIELD to
|
||||
"Incoming record with later cursor and equal extracted_at than existing record. Upsert should update with this record.",
|
||||
),
|
||||
outputRecord(
|
||||
"739a9347-267b-48af-a172-2030320e2193",
|
||||
"2025-01-22T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
1L,
|
||||
ID_FIELD to "13",
|
||||
TEST_FIELD to 1013L,
|
||||
DESCRIPTION_FIELD to
|
||||
"Existing record with later cursor and later extracted_at than incoming record. Upsert should preserve this record.",
|
||||
),
|
||||
outputRecord(
|
||||
"70243c59-eadb-4840-90fa-be4ed57609fc",
|
||||
"2025-01-22T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
1L,
|
||||
ID_FIELD to "14",
|
||||
TEST_FIELD to 14L,
|
||||
DESCRIPTION_FIELD to
|
||||
"Existing record with equal cursor and later extracted_at than incoming record. Upsert should preserve this record.",
|
||||
),
|
||||
outputRecord(
|
||||
"373127a7-a40e-4e23-890b-1a52114686ee",
|
||||
"2025-01-21T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
1L,
|
||||
ID_FIELD to "15",
|
||||
TEST_FIELD to 1015L,
|
||||
DESCRIPTION_FIELD to
|
||||
"Incoming record with later cursor and earlier extracted_at than existing record. Upsert should update with this record.",
|
||||
),
|
||||
)
|
||||
|
||||
@@ -343,10 +673,10 @@ object TableOperationsFixtures {
|
||||
}
|
||||
|
||||
// Create common destination stream configurations
|
||||
fun createAppendStream(
|
||||
fun createStream(
|
||||
namespace: String,
|
||||
name: String,
|
||||
schema: ObjectType,
|
||||
tableSchema: StreamTableSchema,
|
||||
generationId: Long = 1,
|
||||
minimumGenerationId: Long = 0,
|
||||
syncId: Long = 1,
|
||||
@@ -354,40 +684,23 @@ object TableOperationsFixtures {
|
||||
DestinationStream(
|
||||
unmappedNamespace = namespace,
|
||||
unmappedName = name,
|
||||
importType = Append,
|
||||
importType = tableSchema.importType,
|
||||
generationId = generationId,
|
||||
minimumGenerationId = minimumGenerationId,
|
||||
syncId = syncId,
|
||||
schema = schema,
|
||||
schema = ObjectType(LinkedHashMap(tableSchema.columnSchema.inputSchema)),
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
tableSchema = tableSchema,
|
||||
)
|
||||
|
||||
fun createDedupeStream(
|
||||
namespace: String,
|
||||
name: String,
|
||||
schema: ObjectType,
|
||||
primaryKey: List<List<String>>,
|
||||
cursor: List<String>,
|
||||
generationId: Long = 1,
|
||||
minimumGenerationId: Long = 0,
|
||||
syncId: Long = 1,
|
||||
): DestinationStream =
|
||||
DestinationStream(
|
||||
unmappedNamespace = namespace,
|
||||
unmappedName = name,
|
||||
importType =
|
||||
Dedupe(
|
||||
primaryKey = primaryKey,
|
||||
cursor = cursor,
|
||||
),
|
||||
generationId = generationId,
|
||||
minimumGenerationId = minimumGenerationId,
|
||||
syncId = syncId,
|
||||
schema = schema,
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
)
|
||||
fun <V> List<Map<String, V>>.sortBy(key: String) =
|
||||
// sketchy unchecked cast is intentional, we're assuming that the tests are written such
|
||||
// that the sort key is always comparable.
|
||||
// In practice, it's generally some sort of ID column (int/string/etc.).
|
||||
@Suppress("UNCHECKED_CAST") this.sortedBy { it[key] as Comparable<Any> }
|
||||
|
||||
fun <V> List<Map<String, V>>.sortByTestField() = this.sortedBy { it["test"] as Long }
|
||||
fun <V> Map<String, V>.prettyString() =
|
||||
"{" + this.entries.sortedBy { it.key }.joinToString(", ") + "}"
|
||||
|
||||
fun <V> List<Map<String, V>>.applyColumnNameMapping(mapping: ColumnNameMapping) =
|
||||
map { record ->
|
||||
@@ -398,7 +711,7 @@ object TableOperationsFixtures {
|
||||
airbyteMetaColumnMapping: Map<String, String>
|
||||
): List<Map<String, V>> {
|
||||
val totalMapping = ColumnNameMapping(columnNameMapping + airbyteMetaColumnMapping)
|
||||
return map { record -> record.mapKeys { (k, _) -> totalMapping.originalName(k) ?: k } }
|
||||
return map { record -> record.mapKeys { (k, _) -> totalMapping.invert()[k] ?: k } }
|
||||
}
|
||||
|
||||
fun <V> List<Map<String, V>>.removeNulls() =
|
||||
@@ -431,6 +744,15 @@ object TableOperationsFixtures {
|
||||
*pairs,
|
||||
)
|
||||
|
||||
fun inputRecord(vararg pairs: Pair<String, AirbyteValue>) =
|
||||
inputRecord(
|
||||
rawId = UUID.randomUUID().toString(),
|
||||
extractedAt = "2025-01-23T00:00:00Z",
|
||||
meta = linkedMapOf(),
|
||||
generationId = 1,
|
||||
pairs = pairs,
|
||||
)
|
||||
|
||||
fun outputRecord(
|
||||
rawId: String,
|
||||
extractedAt: String,
|
||||
@@ -445,4 +767,16 @@ object TableOperationsFixtures {
|
||||
COLUMN_NAME_AB_GENERATION_ID to generationId,
|
||||
*pairs,
|
||||
)
|
||||
|
||||
fun assertEquals(
|
||||
expectedRecords: List<Map<String, Any?>>,
|
||||
actualRecords: List<Map<String, Any?>>,
|
||||
sortKey: String,
|
||||
message: String,
|
||||
) =
|
||||
Assertions.assertEquals(
|
||||
expectedRecords.sortBy(sortKey).joinToString("\n") { it.prettyString() },
|
||||
actualRecords.sortBy(sortKey).joinToString("\n") { it.prettyString() },
|
||||
message,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -4,10 +4,12 @@
|
||||
|
||||
package io.airbyte.cdk.load.component
|
||||
|
||||
import io.airbyte.cdk.load.command.Append
|
||||
import io.airbyte.cdk.load.command.Dedupe
|
||||
import io.airbyte.cdk.load.component.TableOperationsFixtures as Fixtures
|
||||
import io.airbyte.cdk.load.component.TableOperationsFixtures.assertEquals
|
||||
import io.airbyte.cdk.load.component.TableOperationsFixtures.insertRecords
|
||||
import io.airbyte.cdk.load.component.TableOperationsFixtures.reverseColumnNameMapping
|
||||
import io.airbyte.cdk.load.component.TableOperationsFixtures.sortByTestField
|
||||
import io.airbyte.cdk.load.data.AirbyteValue
|
||||
import io.airbyte.cdk.load.data.IntegerValue
|
||||
import io.airbyte.cdk.load.data.ObjectValue
|
||||
@@ -18,6 +20,7 @@ import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_EXTRACTED_AT
|
||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_GENERATION_ID
|
||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_META
|
||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_RAW_ID
|
||||
import io.airbyte.cdk.load.schema.TableSchemaFactory
|
||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
|
||||
import kotlinx.coroutines.test.runTest
|
||||
@@ -48,6 +51,8 @@ interface TableOperationsSuite {
|
||||
/** The database client instance to test. Must be properly configured and connected. */
|
||||
val client: TableOperationsClient
|
||||
val testClient: TestTableOperationsClient
|
||||
val schemaFactory: TableSchemaFactory
|
||||
|
||||
// since ColumnNameMapping doesn't include the airbyte columns...
|
||||
val airbyteMetaColumnMapping: Map<String, String>
|
||||
get() = Meta.COLUMN_NAMES.associateWith { it }
|
||||
@@ -84,16 +89,19 @@ interface TableOperationsSuite {
|
||||
val testTable = Fixtures.generateTestTableName("table-test-table", testNamespace)
|
||||
harness.assertTableDoesNotExist(testTable)
|
||||
|
||||
val tableSchema =
|
||||
schemaFactory.make(testTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
|
||||
|
||||
try {
|
||||
|
||||
client.createTable(
|
||||
tableName = testTable,
|
||||
columnNameMapping = Fixtures.TEST_MAPPING,
|
||||
stream =
|
||||
Fixtures.createAppendStream(
|
||||
Fixtures.createStream(
|
||||
namespace = testTable.namespace,
|
||||
name = testTable.name,
|
||||
schema = Fixtures.TEST_INTEGER_SCHEMA,
|
||||
tableSchema = tableSchema,
|
||||
),
|
||||
replace = false,
|
||||
)
|
||||
@@ -129,11 +137,20 @@ interface TableOperationsSuite {
|
||||
val testTable = Fixtures.generateTestTableName("insert-test-table", testNamespace)
|
||||
harness.assertTableDoesNotExist(testTable)
|
||||
|
||||
val tableSchema =
|
||||
schemaFactory.make(testTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
|
||||
val stream =
|
||||
Fixtures.createStream(
|
||||
namespace = testTable.namespace,
|
||||
name = testTable.name,
|
||||
tableSchema = tableSchema,
|
||||
)
|
||||
|
||||
try {
|
||||
harness.createTestTableAndVerifyExists(
|
||||
tableName = testTable,
|
||||
schema = Fixtures.TEST_INTEGER_SCHEMA,
|
||||
columnNameMapping = columnNameMapping,
|
||||
stream = stream,
|
||||
)
|
||||
|
||||
testClient.insertRecords(testTable, inputRecords, columnNameMapping)
|
||||
@@ -142,7 +159,7 @@ interface TableOperationsSuite {
|
||||
|
||||
assertEquals(
|
||||
expectedRecords,
|
||||
resultRecords.reverseColumnNameMapping(columnNameMapping, airbyteMetaColumnMapping)
|
||||
resultRecords.reverseColumnNameMapping(columnNameMapping, airbyteMetaColumnMapping),
|
||||
)
|
||||
} finally {
|
||||
harness.cleanupTable(testTable)
|
||||
@@ -174,11 +191,20 @@ interface TableOperationsSuite {
|
||||
val testTable = Fixtures.generateTestTableName("count-test-table", testNamespace)
|
||||
harness.assertTableDoesNotExist(testTable)
|
||||
|
||||
val tableSchema =
|
||||
schemaFactory.make(testTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
|
||||
val stream =
|
||||
Fixtures.createStream(
|
||||
namespace = testTable.namespace,
|
||||
name = testTable.name,
|
||||
tableSchema = tableSchema,
|
||||
)
|
||||
|
||||
try {
|
||||
harness.createTestTableAndVerifyExists(
|
||||
tableName = testTable,
|
||||
schema = Fixtures.TEST_INTEGER_SCHEMA,
|
||||
columnNameMapping = columnNameMapping,
|
||||
stream = stream,
|
||||
)
|
||||
|
||||
val records1 =
|
||||
@@ -322,11 +348,20 @@ interface TableOperationsSuite {
|
||||
val testTable = Fixtures.generateTestTableName("gen-id-test-table", testNamespace)
|
||||
harness.assertTableDoesNotExist(testTable)
|
||||
|
||||
val tableSchema =
|
||||
schemaFactory.make(testTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
|
||||
val stream =
|
||||
Fixtures.createStream(
|
||||
namespace = testTable.namespace,
|
||||
name = testTable.name,
|
||||
tableSchema = tableSchema,
|
||||
)
|
||||
|
||||
try {
|
||||
harness.createTestTableAndVerifyExists(
|
||||
tableName = testTable,
|
||||
schema = Fixtures.TEST_INTEGER_SCHEMA,
|
||||
columnNameMapping = columnNameMapping,
|
||||
stream = stream,
|
||||
)
|
||||
|
||||
val genId = 17L
|
||||
@@ -382,18 +417,36 @@ interface TableOperationsSuite {
|
||||
harness.assertTableDoesNotExist(sourceTable)
|
||||
harness.assertTableDoesNotExist(targetTable)
|
||||
|
||||
val sourceTableSchema =
|
||||
schemaFactory.make(sourceTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
|
||||
val sourceStream =
|
||||
Fixtures.createStream(
|
||||
namespace = sourceTable.namespace,
|
||||
name = sourceTable.name,
|
||||
tableSchema = sourceTableSchema,
|
||||
)
|
||||
|
||||
val targetTableSchema =
|
||||
schemaFactory.make(targetTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
|
||||
val targetStream =
|
||||
Fixtures.createStream(
|
||||
namespace = targetTable.namespace,
|
||||
name = targetTable.name,
|
||||
tableSchema = targetTableSchema,
|
||||
)
|
||||
|
||||
try {
|
||||
harness.createTestTableAndVerifyExists(
|
||||
sourceTable,
|
||||
Fixtures.TEST_INTEGER_SCHEMA,
|
||||
columnNameMapping,
|
||||
tableName = sourceTable,
|
||||
columnNameMapping = columnNameMapping,
|
||||
stream = sourceStream,
|
||||
)
|
||||
harness.insertAndVerifyRecordCount(sourceTable, sourceInputRecords, columnNameMapping)
|
||||
|
||||
harness.createTestTableAndVerifyExists(
|
||||
targetTable,
|
||||
Fixtures.TEST_INTEGER_SCHEMA,
|
||||
columnNameMapping,
|
||||
tableName = targetTable,
|
||||
columnNameMapping = columnNameMapping,
|
||||
stream = targetStream,
|
||||
)
|
||||
harness.insertAndVerifyRecordCount(targetTable, targetInputRecords, columnNameMapping)
|
||||
|
||||
@@ -402,13 +455,14 @@ interface TableOperationsSuite {
|
||||
val overwrittenTableRecords = harness.readTableWithoutMetaColumns(targetTable)
|
||||
|
||||
assertEquals(
|
||||
expectedRecords.sortByTestField(),
|
||||
overwrittenTableRecords
|
||||
.reverseColumnNameMapping(columnNameMapping, airbyteMetaColumnMapping)
|
||||
.sortByTestField(),
|
||||
) {
|
||||
"Expected records were not in the overwritten table."
|
||||
}
|
||||
expectedRecords,
|
||||
overwrittenTableRecords.reverseColumnNameMapping(
|
||||
columnNameMapping,
|
||||
airbyteMetaColumnMapping,
|
||||
),
|
||||
"test",
|
||||
"Expected records were not in the overwritten table.",
|
||||
)
|
||||
|
||||
assert(!client.tableExists(sourceTable)) {
|
||||
"Source table: ${sourceTable.namespace}.${sourceTable.name} was not dropped as expected."
|
||||
@@ -453,18 +507,36 @@ interface TableOperationsSuite {
|
||||
harness.assertTableDoesNotExist(sourceTable)
|
||||
harness.assertTableDoesNotExist(targetTable)
|
||||
|
||||
val sourceTableSchema =
|
||||
schemaFactory.make(sourceTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
|
||||
val sourceStream =
|
||||
Fixtures.createStream(
|
||||
namespace = sourceTable.namespace,
|
||||
name = sourceTable.name,
|
||||
tableSchema = sourceTableSchema,
|
||||
)
|
||||
|
||||
val targetTableSchema =
|
||||
schemaFactory.make(targetTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
|
||||
val targetStream =
|
||||
Fixtures.createStream(
|
||||
namespace = targetTable.namespace,
|
||||
name = targetTable.name,
|
||||
tableSchema = targetTableSchema,
|
||||
)
|
||||
|
||||
try {
|
||||
harness.createTestTableAndVerifyExists(
|
||||
sourceTable,
|
||||
Fixtures.TEST_INTEGER_SCHEMA,
|
||||
columnNameMapping,
|
||||
tableName = sourceTable,
|
||||
columnNameMapping = columnNameMapping,
|
||||
stream = sourceStream,
|
||||
)
|
||||
harness.insertAndVerifyRecordCount(sourceTable, sourceInputRecords, columnNameMapping)
|
||||
|
||||
harness.createTestTableAndVerifyExists(
|
||||
targetTable,
|
||||
Fixtures.TEST_INTEGER_SCHEMA,
|
||||
columnNameMapping,
|
||||
tableName = targetTable,
|
||||
columnNameMapping = columnNameMapping,
|
||||
stream = targetStream,
|
||||
)
|
||||
harness.insertAndVerifyRecordCount(targetTable, targetInputRecords, columnNameMapping)
|
||||
|
||||
@@ -473,13 +545,14 @@ interface TableOperationsSuite {
|
||||
val copyTableRecords = harness.readTableWithoutMetaColumns(targetTable)
|
||||
|
||||
assertEquals(
|
||||
expectedRecords.sortByTestField(),
|
||||
copyTableRecords
|
||||
.reverseColumnNameMapping(columnNameMapping, airbyteMetaColumnMapping)
|
||||
.sortByTestField(),
|
||||
) {
|
||||
"Expected source records were not copied to the target table."
|
||||
}
|
||||
expectedRecords,
|
||||
copyTableRecords.reverseColumnNameMapping(
|
||||
columnNameMapping,
|
||||
airbyteMetaColumnMapping,
|
||||
),
|
||||
"test",
|
||||
"Expected source records were not copied to the target table.",
|
||||
)
|
||||
} finally {
|
||||
harness.cleanupTable(sourceTable)
|
||||
harness.cleanupTable(targetTable)
|
||||
@@ -518,31 +591,38 @@ interface TableOperationsSuite {
|
||||
|
||||
harness.assertTableDoesNotExist(sourceTable)
|
||||
|
||||
val sourceTableSchema =
|
||||
schemaFactory.make(sourceTable, Fixtures.ID_TEST_WITH_CDC_SCHEMA.properties, Append)
|
||||
val sourceStream =
|
||||
Fixtures.createAppendStream(
|
||||
Fixtures.createStream(
|
||||
namespace = sourceTable.namespace,
|
||||
name = sourceTable.name,
|
||||
schema = Fixtures.ID_TEST_WITH_CDC_SCHEMA,
|
||||
tableSchema = sourceTableSchema,
|
||||
)
|
||||
|
||||
val targetTable = Fixtures.generateTestTableName("upsert-test-target-table", testNamespace)
|
||||
|
||||
harness.assertTableDoesNotExist(targetTable)
|
||||
|
||||
val targetTableSchema =
|
||||
schemaFactory.make(
|
||||
targetTable,
|
||||
Fixtures.TEST_INTEGER_SCHEMA.properties,
|
||||
Dedupe(
|
||||
primaryKey = listOf(listOf(Fixtures.ID_FIELD)),
|
||||
cursor = listOf(Fixtures.TEST_FIELD),
|
||||
),
|
||||
)
|
||||
val targetStream =
|
||||
Fixtures.createDedupeStream(
|
||||
Fixtures.createStream(
|
||||
namespace = targetTable.namespace,
|
||||
name = targetTable.name,
|
||||
schema = Fixtures.ID_TEST_WITH_CDC_SCHEMA,
|
||||
primaryKey = listOf(listOf(Fixtures.ID_FIELD)),
|
||||
cursor = listOf(Fixtures.TEST_FIELD),
|
||||
tableSchema = targetTableSchema,
|
||||
)
|
||||
|
||||
try {
|
||||
harness.createTestTableAndVerifyExists(
|
||||
tableName = sourceTable,
|
||||
columnNameMapping = columnNameMapping,
|
||||
schema = Fixtures.ID_AND_TEST_SCHEMA,
|
||||
stream = sourceStream,
|
||||
)
|
||||
harness.insertAndVerifyRecordCount(sourceTable, sourceInputRecords, columnNameMapping)
|
||||
@@ -550,7 +630,6 @@ interface TableOperationsSuite {
|
||||
harness.createTestTableAndVerifyExists(
|
||||
tableName = targetTable,
|
||||
columnNameMapping = columnNameMapping,
|
||||
schema = Fixtures.ID_TEST_WITH_CDC_SCHEMA,
|
||||
stream = targetStream,
|
||||
)
|
||||
harness.insertAndVerifyRecordCount(targetTable, targetInputRecords, columnNameMapping)
|
||||
@@ -560,13 +639,14 @@ interface TableOperationsSuite {
|
||||
val upsertTableRecords = testClient.readTable(targetTable)
|
||||
|
||||
assertEquals(
|
||||
expectedRecords.sortByTestField(),
|
||||
upsertTableRecords
|
||||
.reverseColumnNameMapping(columnNameMapping, airbyteMetaColumnMapping)
|
||||
.sortByTestField(),
|
||||
) {
|
||||
"Upserted table did not contain expected records."
|
||||
}
|
||||
expectedRecords,
|
||||
upsertTableRecords.reverseColumnNameMapping(
|
||||
columnNameMapping,
|
||||
airbyteMetaColumnMapping,
|
||||
),
|
||||
"id",
|
||||
"Upserted table did not contain expected records.",
|
||||
)
|
||||
} finally {
|
||||
harness.cleanupTable(sourceTable)
|
||||
harness.cleanupTable(targetTable)
|
||||
|
||||
@@ -5,12 +5,10 @@
|
||||
package io.airbyte.cdk.load.component
|
||||
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.component.TableOperationsFixtures.createAppendStream
|
||||
import io.airbyte.cdk.load.component.TableOperationsFixtures.insertRecords
|
||||
import io.airbyte.cdk.load.data.AirbyteValue
|
||||
import io.airbyte.cdk.load.data.ObjectType
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||
import io.airbyte.cdk.load.table.TableName
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import org.junit.jupiter.api.Assertions.assertEquals
|
||||
|
||||
@@ -29,14 +27,8 @@ class TableOperationsTestHarness(
|
||||
/** Creates a test table with the given configuration and verifies it was created. */
|
||||
suspend fun createTestTableAndVerifyExists(
|
||||
tableName: TableName,
|
||||
schema: ObjectType,
|
||||
columnNameMapping: ColumnNameMapping,
|
||||
stream: DestinationStream =
|
||||
createAppendStream(
|
||||
namespace = tableName.namespace,
|
||||
name = tableName.name,
|
||||
schema = schema,
|
||||
)
|
||||
stream: DestinationStream
|
||||
) {
|
||||
client.createTable(
|
||||
stream = stream,
|
||||
|
||||
@@ -0,0 +1,92 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.component
|
||||
|
||||
import io.airbyte.cdk.load.component.TableOperationsFixtures.inputRecord
|
||||
import io.airbyte.cdk.load.data.BooleanValue
|
||||
import io.airbyte.cdk.load.data.FieldType
|
||||
import io.airbyte.cdk.load.data.IntegerType
|
||||
import io.airbyte.cdk.load.data.IntegerValue
|
||||
import io.airbyte.cdk.load.data.ObjectType
|
||||
import io.airbyte.cdk.load.data.ObjectValue
|
||||
import io.airbyte.cdk.load.data.StringType
|
||||
import io.airbyte.cdk.load.data.StringValue
|
||||
import io.airbyte.cdk.load.data.UnknownType
|
||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||
import io.airbyte.cdk.util.Jsons
|
||||
|
||||
object TableSchemaEvolutionFixtures {
|
||||
val ID_AND_STRING_SCHEMA =
|
||||
ObjectType(
|
||||
linkedMapOf(
|
||||
"id" to FieldType(IntegerType, true),
|
||||
"test" to FieldType(StringType, true),
|
||||
),
|
||||
)
|
||||
val ID_AND_UNKNOWN_SCHEMA =
|
||||
ObjectType(
|
||||
linkedMapOf(
|
||||
"id" to FieldType(IntegerType, true),
|
||||
"test" to FieldType(UnknownType(Jsons.readTree("""{"type": "potato"}""")), true),
|
||||
),
|
||||
)
|
||||
|
||||
val STRING_TO_UNKNOWN_TYPE_INPUT_RECORDS =
|
||||
listOf(
|
||||
inputRecord("id" to IntegerValue(1), "test" to StringValue("\"foo\"")),
|
||||
inputRecord("id" to IntegerValue(2), "test" to StringValue("""{"foo": "bar"}""")),
|
||||
inputRecord("id" to IntegerValue(3), "test" to StringValue("true")),
|
||||
inputRecord("id" to IntegerValue(4), "test" to StringValue("0")),
|
||||
inputRecord("id" to IntegerValue(5), "test" to StringValue("foo")),
|
||||
)
|
||||
val STRING_TO_UNKNOWN_TYPE_EXPECTED_RECORDS =
|
||||
listOf(
|
||||
mapOf("id" to 1L, "test" to "\"foo\""),
|
||||
mapOf("id" to 2L, "test" to """{"foo": "bar"}"""),
|
||||
mapOf("id" to 3L, "test" to "true"),
|
||||
mapOf("id" to 4L, "test" to "0"),
|
||||
mapOf("id" to 5L, "test" to "foo"),
|
||||
)
|
||||
|
||||
val UNKNOWN_TO_STRING_TYPE_INPUT_RECORDS =
|
||||
listOf(
|
||||
inputRecord("id" to IntegerValue(1), "test" to StringValue("foo")),
|
||||
inputRecord(
|
||||
"id" to IntegerValue(2),
|
||||
"test" to ObjectValue(linkedMapOf("foo" to StringValue("bar")))
|
||||
),
|
||||
inputRecord("id" to IntegerValue(3), "test" to BooleanValue(true)),
|
||||
inputRecord("id" to IntegerValue(4), "test" to IntegerValue(0)),
|
||||
)
|
||||
val UNKNOWN_TO_STRING_TYPE_EXPECTED_RECORDS =
|
||||
listOf(
|
||||
mapOf("id" to 1L, "test" to "foo"),
|
||||
mapOf("id" to 2L, "test" to """{"foo":"bar"}"""),
|
||||
mapOf("id" to 3L, "test" to "true"),
|
||||
mapOf("id" to 4L, "test" to "0"),
|
||||
)
|
||||
|
||||
val APPLY_CHANGESET_INITIAL_COLUMN_MAPPING =
|
||||
ColumnNameMapping(
|
||||
mapOf(
|
||||
"id" to "id",
|
||||
"updated_at" to "updated_at",
|
||||
"to_retain" to "to_retain",
|
||||
"to_change" to "to_change",
|
||||
"to_drop" to "to_drop",
|
||||
)
|
||||
)
|
||||
val APPLY_CHANGESET_MODIFIED_COLUMN_MAPPING =
|
||||
ColumnNameMapping(
|
||||
mapOf(
|
||||
"id" to "id",
|
||||
"updated_at" to "updated_at",
|
||||
"to_retain" to "to_retain",
|
||||
"to_change" to "to_change",
|
||||
"to_add" to "to_add",
|
||||
)
|
||||
)
|
||||
val APPLY_CHANGESET_EXPECTED_EXTRACTED_AT = "2025-01-22T00:00:00Z"
|
||||
}
|
||||
@@ -4,31 +4,32 @@
|
||||
|
||||
package io.airbyte.cdk.load.component
|
||||
|
||||
import io.airbyte.cdk.load.command.Append
|
||||
import io.airbyte.cdk.load.command.Dedupe
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.command.ImportType
|
||||
import io.airbyte.cdk.load.component.TableOperationsFixtures as Fixtures
|
||||
import io.airbyte.cdk.load.component.TableOperationsFixtures.ID_FIELD
|
||||
import io.airbyte.cdk.load.component.TableOperationsFixtures.TEST_FIELD
|
||||
import io.airbyte.cdk.load.component.TableOperationsFixtures.assertEquals
|
||||
import io.airbyte.cdk.load.component.TableOperationsFixtures.inputRecord
|
||||
import io.airbyte.cdk.load.component.TableOperationsFixtures.insertRecords
|
||||
import io.airbyte.cdk.load.component.TableOperationsFixtures.removeNulls
|
||||
import io.airbyte.cdk.load.component.TableOperationsFixtures.reverseColumnNameMapping
|
||||
import io.airbyte.cdk.load.data.AirbyteValue
|
||||
import io.airbyte.cdk.load.data.FieldType
|
||||
import io.airbyte.cdk.load.data.IntegerType
|
||||
import io.airbyte.cdk.load.data.IntegerValue
|
||||
import io.airbyte.cdk.load.data.ObjectType
|
||||
import io.airbyte.cdk.load.data.ObjectValue
|
||||
import io.airbyte.cdk.load.data.StringType
|
||||
import io.airbyte.cdk.load.data.StringValue
|
||||
import io.airbyte.cdk.load.data.TimestampWithTimezoneValue
|
||||
import io.airbyte.cdk.load.message.Meta
|
||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_EXTRACTED_AT
|
||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_GENERATION_ID
|
||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_META
|
||||
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_RAW_ID
|
||||
import io.airbyte.cdk.load.schema.TableSchemaFactory
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||
import io.airbyte.cdk.load.table.TableName
|
||||
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
|
||||
import kotlin.test.assertEquals
|
||||
import kotlinx.coroutines.test.runTest
|
||||
import org.junit.jupiter.api.Assertions
|
||||
import org.junit.jupiter.api.Assertions.assertTrue
|
||||
import org.junit.jupiter.api.assertAll
|
||||
|
||||
@@ -40,6 +41,7 @@ interface TableSchemaEvolutionSuite {
|
||||
|
||||
val opsClient: TableOperationsClient
|
||||
val testClient: TestTableOperationsClient
|
||||
val schemaFactory: TableSchemaFactory
|
||||
|
||||
private val harness: TableOperationsTestHarness
|
||||
get() = TableOperationsTestHarness(opsClient, testClient, airbyteMetaColumnMapping)
|
||||
@@ -61,11 +63,13 @@ interface TableSchemaEvolutionSuite {
|
||||
) = runTest {
|
||||
val testNamespace = Fixtures.generateTestNamespace("namespace-test")
|
||||
val testTable = Fixtures.generateTestTableName("table-test-table", testNamespace)
|
||||
val tableSchema =
|
||||
schemaFactory.make(testTable, Fixtures.ALL_TYPES_SCHEMA.properties, Append)
|
||||
val stream =
|
||||
Fixtures.createAppendStream(
|
||||
Fixtures.createStream(
|
||||
namespace = testTable.namespace,
|
||||
name = testTable.name,
|
||||
schema = Fixtures.ALL_TYPES_SCHEMA,
|
||||
tableSchema = tableSchema,
|
||||
)
|
||||
|
||||
opsClient.createNamespace(testNamespace)
|
||||
@@ -97,11 +101,13 @@ interface TableSchemaEvolutionSuite {
|
||||
) {
|
||||
val testNamespace = Fixtures.generateTestNamespace("namespace-test")
|
||||
val testTable = Fixtures.generateTestTableName("table-test-table", testNamespace)
|
||||
val tableSchema =
|
||||
schemaFactory.make(testTable, Fixtures.ALL_TYPES_SCHEMA.properties, Append)
|
||||
val stream =
|
||||
Fixtures.createAppendStream(
|
||||
Fixtures.createStream(
|
||||
namespace = testTable.namespace,
|
||||
name = testTable.name,
|
||||
schema = Fixtures.ALL_TYPES_SCHEMA,
|
||||
tableSchema = tableSchema,
|
||||
)
|
||||
val computedSchema = client.computeSchema(stream, columnNameMapping)
|
||||
assertEquals(expectedComputedSchema, computedSchema)
|
||||
@@ -309,58 +315,96 @@ interface TableSchemaEvolutionSuite {
|
||||
)
|
||||
}
|
||||
|
||||
fun `basic apply changeset`() {
|
||||
`basic apply changeset`(
|
||||
initialColumnNameMapping =
|
||||
ColumnNameMapping(
|
||||
mapOf(
|
||||
"to_retain" to "to_retain",
|
||||
"to_change" to "to_change",
|
||||
"to_drop" to "to_drop",
|
||||
)
|
||||
),
|
||||
modifiedColumnNameMapping =
|
||||
ColumnNameMapping(
|
||||
mapOf(
|
||||
"to_retain" to "to_retain",
|
||||
"to_change" to "to_change",
|
||||
"to_add" to "to_add",
|
||||
)
|
||||
),
|
||||
fun `apply changeset - handle sync mode append`() {
|
||||
`apply changeset`(Append, Append)
|
||||
}
|
||||
|
||||
fun `apply changeset - handle changing sync mode from append to dedup`() {
|
||||
`apply changeset`(Append, Dedupe(primaryKey = listOf(listOf("id")), cursor = emptyList()))
|
||||
}
|
||||
|
||||
fun `apply changeset - handle changing sync mode from dedup to append`() {
|
||||
`apply changeset`(Dedupe(primaryKey = listOf(listOf("id")), cursor = emptyList()), Append)
|
||||
}
|
||||
|
||||
fun `apply changeset - handle sync mode dedup`() {
|
||||
`apply changeset`(
|
||||
Dedupe(primaryKey = listOf(listOf("id")), cursor = emptyList()),
|
||||
Dedupe(primaryKey = listOf(listOf("id")), cursor = emptyList())
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute a basic set of schema changes. We're not changing the sync mode, the types are just
|
||||
* Execute a basic set of schema changes, across a variety of sync modes. The types are just
|
||||
* string/int (i.e. no JSON), and there's no funky characters anywhere.
|
||||
*
|
||||
* You should not directly annotate this function with `@Test`. Instead:
|
||||
* 1. If you need to modify any of the parameters, override this function (if the defaults
|
||||
* ```
|
||||
* work correctly, you can skip this step)
|
||||
* ```
|
||||
* 2. Annotate `@Test` onto [`apply changeset - append-append`], [`apply changeset -
|
||||
* append-dedup`], etc.
|
||||
*/
|
||||
fun `basic apply changeset`(
|
||||
fun `apply changeset`(
|
||||
initialStreamImportType: ImportType,
|
||||
modifiedStreamImportType: ImportType,
|
||||
) {
|
||||
`apply changeset`(
|
||||
TableSchemaEvolutionFixtures.APPLY_CHANGESET_INITIAL_COLUMN_MAPPING,
|
||||
TableSchemaEvolutionFixtures.APPLY_CHANGESET_MODIFIED_COLUMN_MAPPING,
|
||||
// If your destination reads back timestamps in a nonstandard format, you can override
|
||||
// this value to match that format.
|
||||
TableSchemaEvolutionFixtures.APPLY_CHANGESET_EXPECTED_EXTRACTED_AT,
|
||||
initialStreamImportType,
|
||||
modifiedStreamImportType,
|
||||
)
|
||||
}
|
||||
|
||||
fun `apply changeset`(
|
||||
initialColumnNameMapping: ColumnNameMapping,
|
||||
modifiedColumnNameMapping: ColumnNameMapping
|
||||
modifiedColumnNameMapping: ColumnNameMapping,
|
||||
expectedExtractedAt: String,
|
||||
initialStreamImportType: ImportType,
|
||||
modifiedStreamImportType: ImportType,
|
||||
) = runTest {
|
||||
val testNamespace = Fixtures.generateTestNamespace("namespace-test")
|
||||
val testTable = Fixtures.generateTestTableName("table-test-table", testNamespace)
|
||||
val initialSchema =
|
||||
ObjectType(
|
||||
linkedMapOf(
|
||||
"id" to FieldType(IntegerType, true),
|
||||
"updated_at" to FieldType(IntegerType, true),
|
||||
"to_retain" to FieldType(StringType, true),
|
||||
"to_change" to FieldType(IntegerType, true),
|
||||
"to_drop" to FieldType(StringType, true),
|
||||
),
|
||||
)
|
||||
val initialTableSchema =
|
||||
schemaFactory.make(testTable, initialSchema.properties, initialStreamImportType)
|
||||
val initialStream =
|
||||
Fixtures.createStream(
|
||||
testTable.namespace,
|
||||
testTable.name,
|
||||
initialTableSchema,
|
||||
)
|
||||
val modifiedSchema =
|
||||
ObjectType(
|
||||
linkedMapOf(
|
||||
"id" to FieldType(IntegerType, true),
|
||||
"updated_at" to FieldType(IntegerType, true),
|
||||
"to_retain" to FieldType(StringType, true),
|
||||
"to_change" to FieldType(StringType, true),
|
||||
"to_add" to FieldType(StringType, true),
|
||||
),
|
||||
)
|
||||
val modifiedTableSchema =
|
||||
schemaFactory.make(testTable, modifiedSchema.properties, modifiedStreamImportType)
|
||||
val modifiedStream =
|
||||
Fixtures.createAppendStream(
|
||||
namespace = testTable.namespace,
|
||||
name = testTable.name,
|
||||
schema = modifiedSchema,
|
||||
Fixtures.createStream(
|
||||
testTable.namespace,
|
||||
testTable.name,
|
||||
modifiedTableSchema,
|
||||
)
|
||||
|
||||
// Create the table and compute the schema changeset
|
||||
@@ -371,16 +415,20 @@ interface TableSchemaEvolutionSuite {
|
||||
initialColumnNameMapping,
|
||||
modifiedSchema,
|
||||
modifiedColumnNameMapping,
|
||||
initialStream,
|
||||
modifiedStream,
|
||||
)
|
||||
// Insert a record before applying the changeset
|
||||
testClient.insertRecords(
|
||||
testTable,
|
||||
initialColumnNameMapping,
|
||||
mapOf(
|
||||
COLUMN_NAME_AB_RAW_ID to StringValue("fcc784dd-bf06-468e-ad59-666d5aaceae8"),
|
||||
COLUMN_NAME_AB_EXTRACTED_AT to TimestampWithTimezoneValue("2025-01-22T00:00:00Z"),
|
||||
COLUMN_NAME_AB_META to ObjectValue(linkedMapOf()),
|
||||
COLUMN_NAME_AB_GENERATION_ID to IntegerValue(1),
|
||||
inputRecord(
|
||||
"fcc784dd-bf06-468e-ad59-666d5aaceae8",
|
||||
"2025-01-22T00:00:00Z",
|
||||
linkedMapOf(),
|
||||
1,
|
||||
"id" to IntegerValue(1234),
|
||||
"updated_at" to IntegerValue(5678),
|
||||
"to_retain" to StringValue("to_retain original value"),
|
||||
"to_change" to IntegerValue(42),
|
||||
"to_drop" to StringValue("to_drop original value"),
|
||||
@@ -395,22 +443,33 @@ interface TableSchemaEvolutionSuite {
|
||||
changeset,
|
||||
)
|
||||
|
||||
val postAlterationRecords = harness.readTableWithoutMetaColumns(testTable)
|
||||
Assertions.assertEquals(
|
||||
// Many destinations fully recreate the table when changing the sync mode,
|
||||
// so don't use harness.readTableWithoutMetaColumns.
|
||||
// We need to assert that the meta columns were preserved.
|
||||
val postAlterationRecords =
|
||||
testClient
|
||||
.readTable(testTable)
|
||||
.removeNulls()
|
||||
.reverseColumnNameMapping(modifiedColumnNameMapping, airbyteMetaColumnMapping)
|
||||
assertEquals(
|
||||
listOf(
|
||||
mapOf(
|
||||
"_airbyte_raw_id" to "fcc784dd-bf06-468e-ad59-666d5aaceae8",
|
||||
"_airbyte_extracted_at" to expectedExtractedAt,
|
||||
"_airbyte_meta" to linkedMapOf<String, Any?>(),
|
||||
"_airbyte_generation_id" to 1L,
|
||||
"id" to 1234L,
|
||||
"updated_at" to 5678L,
|
||||
"to_retain" to "to_retain original value",
|
||||
// changed from int to string
|
||||
"to_change" to "42",
|
||||
// note the lack of `to_add` - new columns should be initialized to null
|
||||
)
|
||||
),
|
||||
postAlterationRecords
|
||||
.removeNulls()
|
||||
.reverseColumnNameMapping(modifiedColumnNameMapping, airbyteMetaColumnMapping),
|
||||
) {
|
||||
postAlterationRecords,
|
||||
"id",
|
||||
"Expected records were not in the overwritten table."
|
||||
}
|
||||
)
|
||||
|
||||
val postAlterationDiscoveredSchema = client.discoverSchema(testTable)
|
||||
val postAlterationChangeset =
|
||||
@@ -421,6 +480,68 @@ interface TableSchemaEvolutionSuite {
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that we can alter a column from StringType to UnknownType. In many destinations, this
|
||||
* poses some challenges (e.g. naively casting VARCHAR to JSON may not work as expected).
|
||||
*
|
||||
* See also [`change from unknown type to string type`].
|
||||
*/
|
||||
fun `change from string type to unknown type`() {
|
||||
`change from string type to unknown type`(
|
||||
Fixtures.ID_AND_TEST_MAPPING,
|
||||
Fixtures.ID_AND_TEST_MAPPING,
|
||||
TableSchemaEvolutionFixtures.STRING_TO_UNKNOWN_TYPE_INPUT_RECORDS,
|
||||
TableSchemaEvolutionFixtures.STRING_TO_UNKNOWN_TYPE_EXPECTED_RECORDS,
|
||||
)
|
||||
}
|
||||
|
||||
fun `change from string type to unknown type`(
|
||||
initialColumnNameMapping: ColumnNameMapping,
|
||||
modifiedColumnNameMapping: ColumnNameMapping,
|
||||
inputRecords: List<Map<String, AirbyteValue>>,
|
||||
expectedRecords: List<Map<String, Any?>>,
|
||||
) =
|
||||
executeAndVerifySchemaEvolution(
|
||||
TableSchemaEvolutionFixtures.ID_AND_STRING_SCHEMA,
|
||||
initialColumnNameMapping,
|
||||
TableSchemaEvolutionFixtures.ID_AND_UNKNOWN_SCHEMA,
|
||||
modifiedColumnNameMapping,
|
||||
inputRecords,
|
||||
expectedRecords,
|
||||
)
|
||||
|
||||
/**
|
||||
* Test that we can alter a column from UnknownType to StringType. In many destinations, this
|
||||
* poses some challenges (e.g. naively casting JSON to VARCHAR may not work as expected).
|
||||
*
|
||||
* See also [`change from string type to unknown type`].
|
||||
*/
|
||||
fun `change from unknown type to string type`() {
|
||||
`change from string type to unknown type`(
|
||||
Fixtures.ID_AND_TEST_MAPPING,
|
||||
Fixtures.ID_AND_TEST_MAPPING,
|
||||
TableSchemaEvolutionFixtures.UNKNOWN_TO_STRING_TYPE_INPUT_RECORDS,
|
||||
TableSchemaEvolutionFixtures.UNKNOWN_TO_STRING_TYPE_EXPECTED_RECORDS,
|
||||
)
|
||||
}
|
||||
|
||||
fun `change from unknown type to string type`(
|
||||
initialColumnNameMapping: ColumnNameMapping,
|
||||
modifiedColumnNameMapping: ColumnNameMapping,
|
||||
inputRecords: List<Map<String, AirbyteValue>>,
|
||||
expectedRecords: List<Map<String, Any?>>,
|
||||
) =
|
||||
executeAndVerifySchemaEvolution(
|
||||
TableSchemaEvolutionFixtures.ID_AND_UNKNOWN_SCHEMA,
|
||||
initialColumnNameMapping,
|
||||
TableSchemaEvolutionFixtures.ID_AND_STRING_SCHEMA,
|
||||
modifiedColumnNameMapping,
|
||||
inputRecords,
|
||||
expectedRecords,
|
||||
)
|
||||
|
||||
// TODO add tests for funky chars (add/drop/change type; funky chars in PK/cursor)
|
||||
|
||||
/**
|
||||
* Utility method for a typical schema evolution test. Creates a table with [initialSchema]
|
||||
* using [initialColumnNameMapping], then computes the column changeset using [modifiedSchema]
|
||||
@@ -434,20 +555,19 @@ interface TableSchemaEvolutionSuite {
|
||||
initialColumnNameMapping: ColumnNameMapping,
|
||||
modifiedSchema: ObjectType,
|
||||
modifiedColumnNameMapping: ColumnNameMapping,
|
||||
initialStream: DestinationStream =
|
||||
Fixtures.createStream(
|
||||
namespace = testTable.namespace,
|
||||
name = testTable.name,
|
||||
tableSchema = schemaFactory.make(testTable, initialSchema.properties, Append),
|
||||
),
|
||||
modifiedStream: DestinationStream =
|
||||
Fixtures.createStream(
|
||||
namespace = testTable.namespace,
|
||||
name = testTable.name,
|
||||
tableSchema = schemaFactory.make(testTable, modifiedSchema.properties, Append),
|
||||
),
|
||||
): SchemaEvolutionComputation {
|
||||
val initialStream =
|
||||
Fixtures.createAppendStream(
|
||||
namespace = testTable.namespace,
|
||||
name = testTable.name,
|
||||
schema = initialSchema,
|
||||
)
|
||||
val modifiedStream =
|
||||
Fixtures.createAppendStream(
|
||||
namespace = testTable.namespace,
|
||||
name = testTable.name,
|
||||
schema = modifiedSchema,
|
||||
)
|
||||
|
||||
opsClient.createNamespace(testTable.namespace)
|
||||
opsClient.createTable(
|
||||
tableName = testTable,
|
||||
@@ -463,6 +583,57 @@ interface TableSchemaEvolutionSuite {
|
||||
actualSchema,
|
||||
expectedSchema,
|
||||
columnChangeset,
|
||||
modifiedStream,
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a table using [initialSchema]; insert [inputRecords] to the table; execute a schema
|
||||
* evolution to [modifiedSchema]; read back the table and verify that it contains
|
||||
* [expectedRecords].
|
||||
*
|
||||
* By convention: the schemas should use the column name `id` to identify records.
|
||||
*/
|
||||
private fun executeAndVerifySchemaEvolution(
|
||||
initialSchema: ObjectType,
|
||||
initialColumnNameMapping: ColumnNameMapping,
|
||||
modifiedSchema: ObjectType,
|
||||
modifiedColumnNameMapping: ColumnNameMapping,
|
||||
inputRecords: List<Map<String, AirbyteValue>>,
|
||||
expectedRecords: List<Map<String, Any?>>,
|
||||
) = runTest {
|
||||
val testNamespace = Fixtures.generateTestNamespace("namespace-test")
|
||||
val testTable = Fixtures.generateTestTableName("table-test-table", testNamespace)
|
||||
|
||||
// Create the table and compute the schema changeset
|
||||
val (_, expectedSchema, changeset, modifiedStream) =
|
||||
computeSchemaEvolution(
|
||||
testTable,
|
||||
initialSchema,
|
||||
initialColumnNameMapping,
|
||||
modifiedSchema,
|
||||
modifiedColumnNameMapping,
|
||||
)
|
||||
|
||||
testClient.insertRecords(testTable, inputRecords, initialColumnNameMapping)
|
||||
|
||||
client.applyChangeset(
|
||||
modifiedStream,
|
||||
modifiedColumnNameMapping,
|
||||
testTable,
|
||||
expectedSchema.columns,
|
||||
changeset,
|
||||
)
|
||||
|
||||
val postAlterationRecords =
|
||||
harness
|
||||
.readTableWithoutMetaColumns(testTable)
|
||||
.reverseColumnNameMapping(modifiedColumnNameMapping, airbyteMetaColumnMapping)
|
||||
assertEquals(
|
||||
expectedRecords,
|
||||
postAlterationRecords,
|
||||
"id",
|
||||
"",
|
||||
)
|
||||
}
|
||||
|
||||
@@ -470,5 +641,6 @@ interface TableSchemaEvolutionSuite {
|
||||
val discoveredSchema: TableSchema,
|
||||
val computedSchema: TableSchema,
|
||||
val columnChangeset: ColumnChangeset,
|
||||
val modifiedStream: DestinationStream,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
package io.airbyte.cdk.load.component
|
||||
|
||||
import io.airbyte.cdk.load.data.AirbyteValue
|
||||
import io.airbyte.cdk.load.table.TableName
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
|
||||
interface TestTableOperationsClient {
|
||||
/** Tests database connectivity. */
|
||||
|
||||
@@ -51,6 +51,10 @@ import io.airbyte.cdk.load.message.Meta.Change
|
||||
import io.airbyte.cdk.load.message.Meta.Companion.CHECKPOINT_ID_NAME
|
||||
import io.airbyte.cdk.load.message.Meta.Companion.CHECKPOINT_INDEX_NAME
|
||||
import io.airbyte.cdk.load.message.StreamCheckpoint
|
||||
import io.airbyte.cdk.load.schema.model.ColumnSchema
|
||||
import io.airbyte.cdk.load.schema.model.StreamTableSchema
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import io.airbyte.cdk.load.schema.model.TableNames
|
||||
import io.airbyte.cdk.load.state.CheckpointId
|
||||
import io.airbyte.cdk.load.state.CheckpointIndex
|
||||
import io.airbyte.cdk.load.state.CheckpointKey
|
||||
@@ -380,6 +384,7 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
val messages =
|
||||
runSync(
|
||||
@@ -492,6 +497,7 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
val messages =
|
||||
runSync(
|
||||
@@ -668,6 +674,7 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
val stream2 =
|
||||
DestinationStream(
|
||||
@@ -679,6 +686,7 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
val stream3 =
|
||||
DestinationStream(
|
||||
@@ -690,6 +698,7 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
val messages =
|
||||
runSync(
|
||||
@@ -1025,6 +1034,7 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
val stream2 =
|
||||
DestinationStream(
|
||||
@@ -1036,6 +1046,7 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
val stream3 =
|
||||
DestinationStream(
|
||||
@@ -1047,6 +1058,7 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
val messages =
|
||||
runSync(
|
||||
@@ -1528,7 +1540,8 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
generationId = 0,
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium()
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
val messages =
|
||||
runSync(
|
||||
@@ -1652,7 +1665,8 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
syncId = 42,
|
||||
isFileBased = true,
|
||||
includeFiles = true,
|
||||
namespaceMapper = namespaceMapperForMedium()
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
|
||||
val sourcePath = "path/to/file"
|
||||
@@ -1744,7 +1758,8 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
generationId = 0,
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium()
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
val stateMessage =
|
||||
runSyncUntilStateAckAndExpectFailure(
|
||||
@@ -1831,7 +1846,8 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
generationId = 0,
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium()
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
val stream1 = makeStream(randomizedNamespace + "_1")
|
||||
val stream2 = makeStream(randomizedNamespace + "_2")
|
||||
@@ -1936,7 +1952,8 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
generationId = 0,
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium()
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
// Catalog with some weird schemas.
|
||||
// Every stream has an int `id`, and maybe some string fields.
|
||||
@@ -2066,7 +2083,8 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
generationId = 42,
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium()
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
runSync(
|
||||
updatedConfig,
|
||||
@@ -2120,7 +2138,8 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
generationId,
|
||||
minimumGenerationId,
|
||||
syncId,
|
||||
namespaceMapper = namespaceMapperForMedium()
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
val stream =
|
||||
makeStream(
|
||||
@@ -2252,7 +2271,8 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
generationId,
|
||||
minimumGenerationId,
|
||||
syncId,
|
||||
namespaceMapper = namespaceMapperForMedium()
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
val stream =
|
||||
makeStream(
|
||||
@@ -2366,7 +2386,8 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
generationId = 41,
|
||||
minimumGenerationId = 0,
|
||||
syncId = 41,
|
||||
namespaceMapper = namespaceMapperForMedium()
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
fun makeInputRecord(id: Int, updatedAt: String, extractedAt: Long) =
|
||||
InputRecord(
|
||||
@@ -2538,7 +2559,8 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
generationId = 42,
|
||||
minimumGenerationId = 42,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium()
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
fun makeInputRecord(id: Int, updatedAt: String, extractedAt: Long) =
|
||||
InputRecord(
|
||||
@@ -2663,7 +2685,8 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
generationId = 41,
|
||||
minimumGenerationId = 0,
|
||||
syncId = 41,
|
||||
namespaceMapper = namespaceMapperForMedium()
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
fun makeInputRecord(id: Int, updatedAt: String, extractedAt: Long) =
|
||||
InputRecord(
|
||||
@@ -2847,7 +2870,8 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
generationId = 0,
|
||||
minimumGenerationId = 0,
|
||||
syncId,
|
||||
namespaceMapper = namespaceMapperForMedium()
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
val stream = makeStream(syncId = 42)
|
||||
runSync(
|
||||
@@ -2917,7 +2941,8 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
generationId = 0,
|
||||
minimumGenerationId = 0,
|
||||
syncId,
|
||||
namespaceMapper = namespaceMapperForMedium()
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
val stream =
|
||||
makeStream(
|
||||
@@ -2997,6 +3022,7 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
minimumGenerationId = 0,
|
||||
syncId = 0,
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
|
||||
val stream1 =
|
||||
@@ -3075,7 +3101,8 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
generationId = generationId,
|
||||
minimumGenerationId = minimumGenerationId,
|
||||
syncId,
|
||||
namespaceMapper = namespaceMapperForMedium()
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
val stream =
|
||||
makeStream(
|
||||
@@ -3199,7 +3226,8 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
generationId = 42,
|
||||
minimumGenerationId = 0,
|
||||
syncId = syncId,
|
||||
namespaceMapper = namespaceMapperForMedium()
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
val sync1Stream = makeStream(syncId = 42)
|
||||
fun makeRecord(data: String, extractedAt: Long) =
|
||||
@@ -3407,7 +3435,8 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
generationId = 0,
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium()
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
runSync(
|
||||
updatedConfig,
|
||||
@@ -3479,7 +3508,8 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
generationId = 42,
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium()
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
val stream1 = makeStream("cursor1")
|
||||
fun makeRecord(stream: DestinationStream, cursorName: String, emittedAtMs: Long) =
|
||||
@@ -3552,6 +3582,7 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
fun makeRecord(stream: DestinationStream, secondPk: String, emittedAtMs: Long) =
|
||||
InputRecord(
|
||||
@@ -3631,7 +3662,8 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
generationId = 42,
|
||||
minimumGenerationId = 42,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium()
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
}
|
||||
val messages =
|
||||
@@ -3689,7 +3721,8 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
generationId = 42,
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium()
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
fun makeRecord(data: String) =
|
||||
InputRecord(
|
||||
@@ -4167,7 +4200,8 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
generationId = 42,
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium()
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
fun makeRecord(data: String) =
|
||||
InputRecord(
|
||||
@@ -4334,7 +4368,8 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
generationId = 42,
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium()
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
runSync(
|
||||
updatedConfig,
|
||||
@@ -4504,7 +4539,8 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
generationId = 42,
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium()
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
|
||||
fun runSync() =
|
||||
@@ -4690,7 +4726,8 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
generationId = 42,
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium()
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
runSync(
|
||||
updatedConfig,
|
||||
@@ -4924,6 +4961,7 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
minimumGenerationId = 0,
|
||||
syncId = 12,
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
runSync(
|
||||
updatedConfig,
|
||||
@@ -4985,7 +5023,8 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
generationId = 42,
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium()
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
runSync(
|
||||
updatedConfig,
|
||||
@@ -5055,7 +5094,8 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
generationId = 0,
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium()
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
assertDoesNotThrow { runSync(updatedConfig, stream, messages = emptyList()) }
|
||||
dumpAndDiffRecords(
|
||||
@@ -5079,7 +5119,8 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
generationId,
|
||||
minimumGenerationId,
|
||||
syncId,
|
||||
namespaceMapper = namespaceMapperForMedium()
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
val firstStream = makeStream(generationId = 12, minimumGenerationId = 0, syncId = 42)
|
||||
runSync(
|
||||
@@ -5118,6 +5159,7 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
minimumGenerationId = 1,
|
||||
syncId = 42,
|
||||
namespaceMapper = namespaceMapperForMedium(),
|
||||
tableSchema = emptyTableSchema,
|
||||
)
|
||||
assertDoesNotThrow {
|
||||
runSync(
|
||||
@@ -5240,4 +5282,18 @@ abstract class BasicFunctionalityIntegrationTest(
|
||||
NamespaceMapper(namespaceDefinitionType = NamespaceDefinitionType.SOURCE)
|
||||
}
|
||||
}
|
||||
|
||||
// This will get blown away in the tests as the DestinationStream's we are mocking just get
|
||||
// converted to the protocol which has no concept of destination schemas
|
||||
protected val emptyTableSchema: StreamTableSchema =
|
||||
StreamTableSchema(
|
||||
columnSchema =
|
||||
ColumnSchema(
|
||||
inputSchema = mapOf(),
|
||||
inputToFinalColumnNames = mapOf(),
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = Append,
|
||||
tableNames = TableNames(finalTableName = TableName("namespace", "test")),
|
||||
)
|
||||
}
|
||||
|
||||
@@ -16,6 +16,10 @@ import io.airbyte.cdk.load.data.json.toAirbyteValue
|
||||
import io.airbyte.cdk.load.message.DestinationFile
|
||||
import io.airbyte.cdk.load.message.InputFile
|
||||
import io.airbyte.cdk.load.message.InputRecord
|
||||
import io.airbyte.cdk.load.schema.model.ColumnSchema
|
||||
import io.airbyte.cdk.load.schema.model.StreamTableSchema
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import io.airbyte.cdk.load.schema.model.TableNames
|
||||
import io.airbyte.cdk.load.state.CheckpointId
|
||||
import io.airbyte.cdk.load.test.util.destination_process.DestinationProcess
|
||||
import io.airbyte.cdk.load.util.CloseableCoroutine
|
||||
@@ -60,20 +64,34 @@ class SingleStreamInsert(
|
||||
primaryKey = listOf(listOf(idColumn.name)),
|
||||
cursor = listOf(idColumn.name),
|
||||
)
|
||||
val schema =
|
||||
val schemaFields =
|
||||
(listOf(idColumn) + columns).map {
|
||||
Pair(it.name, FieldType(type = it.type, nullable = true))
|
||||
}
|
||||
val streamSchema = ObjectType(linkedMapOf(*schemaFields.toTypedArray()))
|
||||
|
||||
DestinationStream(
|
||||
unmappedNamespace = randomizedNamespace,
|
||||
unmappedName = streamName,
|
||||
importType = importType,
|
||||
schema = ObjectType(linkedMapOf(*schema.toTypedArray())),
|
||||
schema = streamSchema,
|
||||
generationId = generationId,
|
||||
minimumGenerationId = minGenerationId,
|
||||
syncId = 1,
|
||||
namespaceMapper = NamespaceMapper()
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
tableSchema =
|
||||
StreamTableSchema(
|
||||
tableNames =
|
||||
TableNames(finalTableName = TableName(randomizedNamespace, streamName)),
|
||||
columnSchema =
|
||||
ColumnSchema(
|
||||
inputSchema = streamSchema.properties,
|
||||
inputToFinalColumnNames =
|
||||
streamSchema.properties.keys.associateWith { it },
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = importType,
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
@@ -177,16 +195,29 @@ class SingleStreamFileTransfer(
|
||||
) : PerformanceTestScenario {
|
||||
private val log = KotlinLogging.logger {}
|
||||
|
||||
private val streamSchema = ObjectType(linkedMapOf())
|
||||
private val stream =
|
||||
DestinationStream(
|
||||
unmappedNamespace = randomizedNamespace,
|
||||
unmappedName = streamName,
|
||||
importType = Append,
|
||||
schema = ObjectType(linkedMapOf()),
|
||||
schema = streamSchema,
|
||||
generationId = 1,
|
||||
minimumGenerationId = 0,
|
||||
syncId = 1,
|
||||
namespaceMapper = NamespaceMapper()
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
tableSchema =
|
||||
StreamTableSchema(
|
||||
tableNames =
|
||||
TableNames(finalTableName = TableName(randomizedNamespace, streamName)),
|
||||
columnSchema =
|
||||
ColumnSchema(
|
||||
inputSchema = mapOf(),
|
||||
inputToFinalColumnNames = mapOf(),
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = Append,
|
||||
)
|
||||
)
|
||||
|
||||
override val catalog: DestinationCatalog =
|
||||
@@ -201,6 +232,20 @@ class SingleStreamFileTransfer(
|
||||
minimumGenerationId = 1,
|
||||
syncId = 101,
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
tableSchema =
|
||||
StreamTableSchema(
|
||||
tableNames =
|
||||
TableNames(
|
||||
finalTableName = TableName(randomizedNamespace, streamName)
|
||||
),
|
||||
columnSchema =
|
||||
ColumnSchema(
|
||||
inputSchema = mapOf(),
|
||||
inputToFinalColumnNames = mapOf(),
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = Append,
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
@@ -258,17 +303,30 @@ class SingleStreamFileAndMetadataTransfer(
|
||||
) : PerformanceTestScenario {
|
||||
private val log = KotlinLogging.logger {}
|
||||
|
||||
private val streamSchema = ObjectType(linkedMapOf())
|
||||
private val stream =
|
||||
DestinationStream(
|
||||
unmappedNamespace = randomizedNamespace,
|
||||
unmappedName = streamName,
|
||||
importType = Append,
|
||||
schema = ObjectType(linkedMapOf()),
|
||||
schema = streamSchema,
|
||||
generationId = 1,
|
||||
minimumGenerationId = 0,
|
||||
syncId = 1,
|
||||
includeFiles = true,
|
||||
namespaceMapper = NamespaceMapper()
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
tableSchema =
|
||||
StreamTableSchema(
|
||||
tableNames =
|
||||
TableNames(finalTableName = TableName(randomizedNamespace, streamName)),
|
||||
columnSchema =
|
||||
ColumnSchema(
|
||||
inputSchema = mapOf(),
|
||||
inputToFinalColumnNames = mapOf(),
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = Append,
|
||||
)
|
||||
)
|
||||
|
||||
override val catalog: DestinationCatalog =
|
||||
@@ -283,7 +341,21 @@ class SingleStreamFileAndMetadataTransfer(
|
||||
minimumGenerationId = 1,
|
||||
syncId = 101,
|
||||
includeFiles = true,
|
||||
namespaceMapper = NamespaceMapper()
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
tableSchema =
|
||||
StreamTableSchema(
|
||||
tableNames =
|
||||
TableNames(
|
||||
finalTableName = TableName(randomizedNamespace, streamName)
|
||||
),
|
||||
columnSchema =
|
||||
ColumnSchema(
|
||||
inputSchema = mapOf(),
|
||||
inputToFinalColumnNames = mapOf(),
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = Append,
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
@@ -374,21 +446,36 @@ class MultiStreamInsert(
|
||||
|
||||
private val streams = run {
|
||||
val importType = Append
|
||||
val schema =
|
||||
val schemaFields =
|
||||
(listOf(idColumn) + columns).map {
|
||||
Pair(it.name, FieldType(type = it.type, nullable = true))
|
||||
}
|
||||
|
||||
(0 until numStreams).map {
|
||||
(0 until numStreams).map { index ->
|
||||
val streamSchema = ObjectType(linkedMapOf(*schemaFields.toTypedArray()))
|
||||
val streamName = "${streamNamePrefix}__$index"
|
||||
DestinationStream(
|
||||
unmappedNamespace = randomizedNamespace,
|
||||
unmappedName = "${streamNamePrefix}__$it",
|
||||
unmappedName = streamName,
|
||||
importType = importType,
|
||||
schema = ObjectType(linkedMapOf(*schema.toTypedArray())),
|
||||
schema = streamSchema,
|
||||
generationId = generationId,
|
||||
minimumGenerationId = minGenerationId,
|
||||
syncId = 1,
|
||||
namespaceMapper = NamespaceMapper()
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
tableSchema =
|
||||
StreamTableSchema(
|
||||
tableNames =
|
||||
TableNames(finalTableName = TableName(randomizedNamespace, streamName)),
|
||||
columnSchema =
|
||||
ColumnSchema(
|
||||
inputSchema = streamSchema.properties,
|
||||
inputToFinalColumnNames =
|
||||
streamSchema.properties.keys.associateWith { it },
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = importType,
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,151 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.write
|
||||
|
||||
import io.airbyte.cdk.command.ConfigurationSpecification
|
||||
import io.airbyte.cdk.load.test.util.FakeDataDumper
|
||||
import io.airbyte.cdk.load.test.util.IntegrationTest
|
||||
import io.airbyte.cdk.load.test.util.NoopDestinationCleaner
|
||||
import io.airbyte.cdk.load.test.util.NoopExpectedRecordMapper
|
||||
import jakarta.inject.Inject
|
||||
import org.junit.jupiter.api.Assertions.assertNotNull
|
||||
import org.junit.jupiter.api.Test
|
||||
|
||||
/**
|
||||
* Validates write operation can initialize with real catalog loading.
|
||||
*
|
||||
* Tests that all beans required for catalog processing exist:
|
||||
* - RawTableNameGenerator, FinalTableNameGenerator, ColumnNameGenerator
|
||||
* - ColumnNameMapper, TableCatalog factory dependencies
|
||||
*
|
||||
* Complements ConnectorWiringSuite:
|
||||
* - ConnectorWiringSuite: Fast component test, validates write path
|
||||
* - WriteInitializationTest: Integration test, validates catalog loading
|
||||
*
|
||||
* Usage: class MyWriteInitTest : WriteInitializationTest<MySpecification>(
|
||||
* ```
|
||||
* configContents = File("secrets/config.json").readText(),
|
||||
* configSpecClass = MySpecification::class.java,
|
||||
* ```
|
||||
* )
|
||||
*
|
||||
* Troubleshooting:
|
||||
* - DI errors = missing bean (add to BeanFactory or mark @Singleton)
|
||||
* - File not found = create secrets/config.json with valid credentials
|
||||
*/
|
||||
abstract class WriteInitializationTest<T : ConfigurationSpecification>(
|
||||
val configContents: String,
|
||||
val configSpecClass: Class<T>,
|
||||
additionalMicronautEnvs: List<String> = emptyList(),
|
||||
) :
|
||||
IntegrationTest(
|
||||
additionalMicronautEnvs = additionalMicronautEnvs,
|
||||
dataDumper = FakeDataDumper,
|
||||
destinationCleaner = NoopDestinationCleaner,
|
||||
recordMangler = NoopExpectedRecordMapper,
|
||||
) {
|
||||
|
||||
@Inject lateinit var writer: DestinationWriter
|
||||
|
||||
/**
|
||||
* Validates all beans for catalog loading exist.
|
||||
*
|
||||
* Creates write process with real catalog to ensure:
|
||||
* - DestinationCatalog can be created from catalog JSON
|
||||
* - TableCatalog factory can create catalog with name generators
|
||||
* - DestinationWriter can be instantiated
|
||||
*
|
||||
* DI errors here = missing beans (same errors that would crash Docker runtime).
|
||||
*/
|
||||
@Test
|
||||
fun `writer can be instantiated with real catalog`() {
|
||||
// Create minimal catalog for testing (with all required fields)
|
||||
val catalog =
|
||||
io.airbyte.protocol.models.v0
|
||||
.ConfiguredAirbyteCatalog()
|
||||
.withStreams(
|
||||
listOf(
|
||||
io.airbyte.protocol.models.v0
|
||||
.ConfiguredAirbyteStream()
|
||||
.withStream(
|
||||
io.airbyte.protocol.models.v0
|
||||
.AirbyteStream()
|
||||
.withName("write_init_test")
|
||||
.withNamespace("test")
|
||||
.withJsonSchema(
|
||||
com.fasterxml.jackson.databind.node.JsonNodeFactory.instance
|
||||
.objectNode()
|
||||
.put("type", "object")
|
||||
.set(
|
||||
"properties",
|
||||
com.fasterxml.jackson.databind.node.JsonNodeFactory
|
||||
.instance
|
||||
.objectNode()
|
||||
.set(
|
||||
"id",
|
||||
com.fasterxml.jackson.databind.node
|
||||
.JsonNodeFactory
|
||||
.instance
|
||||
.objectNode()
|
||||
.put("type", "integer")
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
.withSyncMode(io.airbyte.protocol.models.v0.SyncMode.FULL_REFRESH)
|
||||
.withDestinationSyncMode(
|
||||
io.airbyte.protocol.models.v0.DestinationSyncMode.APPEND
|
||||
)
|
||||
.withGenerationId(0L)
|
||||
.withMinimumGenerationId(0L)
|
||||
.withSyncId(42L)
|
||||
)
|
||||
)
|
||||
|
||||
// Just CREATE the process - DI will fail if beans are missing
|
||||
// We don't actually RUN it (that would hang waiting for stdin)
|
||||
try {
|
||||
val process =
|
||||
destinationProcessFactory.createDestinationProcess(
|
||||
command = "write",
|
||||
configContents = configContents,
|
||||
catalog = catalog,
|
||||
)
|
||||
|
||||
// If we get here, DI succeeded!
|
||||
// Process was created without bean instantiation errors
|
||||
assertNotNull(
|
||||
process,
|
||||
"Write process should be created successfully. " +
|
||||
"DI initialization passed - all required beans exist."
|
||||
)
|
||||
} catch (e: Exception) {
|
||||
// Check if it's a DI error (blocker) vs other error
|
||||
val message = e.message ?: ""
|
||||
val cause = e.cause?.message ?: ""
|
||||
|
||||
if (
|
||||
message.contains("BeanInstantiationException") ||
|
||||
message.contains("Failed to inject") ||
|
||||
message.contains("No bean of type") ||
|
||||
cause.contains("BeanInstantiationException") ||
|
||||
cause.contains("Failed to inject") ||
|
||||
cause.contains("No bean of type")
|
||||
) {
|
||||
throw AssertionError(
|
||||
"Write operation failed to initialize due to DI error. " +
|
||||
"This means required beans are missing. " +
|
||||
"Check for: RawTableNameGenerator, FinalTableNameGenerator, " +
|
||||
"ColumnNameGenerator, ColumnNameMapper, Writer, " +
|
||||
"AggregatePublishingConfig. " +
|
||||
"Original error: $message",
|
||||
e
|
||||
)
|
||||
}
|
||||
// Re-throw other unexpected errors
|
||||
throw e
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -280,8 +280,9 @@ class CdcPartitionReader<T : Comparable<T>>(
|
||||
|
||||
val event = DebeziumEvent(changeEvent)
|
||||
val eventType: EventType = emitRecord(event)
|
||||
// Update counters.
|
||||
updateCounters(event, eventType)
|
||||
if (!engineShuttingDown.get()) {
|
||||
updateCounters(event, eventType)
|
||||
}
|
||||
// Look for reasons to close down the engine.
|
||||
val closeReason: CloseReason = findCloseReason(event, eventType) ?: return
|
||||
// At this point, if we haven't returned already, we want to close down the engine.
|
||||
@@ -341,6 +342,7 @@ class CdcPartitionReader<T : Comparable<T>>(
|
||||
true ->
|
||||
runBlocking(Dispatchers.IO) {
|
||||
recordAcceptor.invoke(deserializedRecord.data, deserializedRecord.changes)
|
||||
updateCounters(event, EventType.RECORD_EMITTED)
|
||||
}
|
||||
// While the engine is running normally, we can emit records synchronously for
|
||||
// better performance.
|
||||
|
||||
@@ -5,6 +5,8 @@ import com.fasterxml.jackson.databind.JsonNode
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode
|
||||
import io.airbyte.cdk.command.JdbcSourceConfiguration
|
||||
import io.airbyte.cdk.command.OpaqueStateValue
|
||||
import io.airbyte.cdk.output.DataChannelMedium.SOCKET
|
||||
import io.airbyte.cdk.output.DataChannelMedium.STDIO
|
||||
import io.airbyte.cdk.output.sockets.toJson
|
||||
import io.airbyte.cdk.util.Jsons
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
@@ -39,7 +41,14 @@ abstract class JdbcPartitionsCreator<
|
||||
override suspend fun run() {}
|
||||
|
||||
override fun checkpoint(): PartitionReadCheckpoint =
|
||||
PartitionReadCheckpoint(partition.completeState, 0)
|
||||
PartitionReadCheckpoint(
|
||||
partition.completeState,
|
||||
0,
|
||||
when (streamState.streamFeedBootstrap.dataChannelMedium) {
|
||||
SOCKET -> generatePartitionId(4)
|
||||
STDIO -> null
|
||||
}
|
||||
)
|
||||
|
||||
override fun releaseResources() {}
|
||||
}
|
||||
|
||||
@@ -6,8 +6,8 @@ package io.airbyte.cdk.load.orchestration.db.direct_load_table
|
||||
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.orchestration.db.Sql
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||
import io.airbyte.cdk.load.table.TableName
|
||||
|
||||
interface DirectLoadSqlGenerator {
|
||||
fun createTable(
|
||||
|
||||
@@ -2,19 +2,26 @@
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.orchestration.db.direct_load_table
|
||||
package io.airbyte.cdk.load.direct_load_table
|
||||
|
||||
import io.airbyte.cdk.SystemErrorException
|
||||
import io.airbyte.cdk.load.command.Append
|
||||
import io.airbyte.cdk.load.command.Dedupe
|
||||
import io.airbyte.cdk.load.command.DestinationCatalog
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.command.Overwrite
|
||||
import io.airbyte.cdk.load.component.TableOperationsClient
|
||||
import io.airbyte.cdk.load.component.TableSchemaEvolutionClient
|
||||
import io.airbyte.cdk.load.orchestration.db.DatabaseHandler
|
||||
import io.airbyte.cdk.load.orchestration.db.DatabaseInitialStatusGatherer
|
||||
import io.airbyte.cdk.load.orchestration.db.TempTableNameGenerator
|
||||
import io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping.TableCatalog
|
||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||
import io.airbyte.cdk.load.table.DatabaseInitialStatusGatherer
|
||||
import io.airbyte.cdk.load.table.TempTableNameGenerator
|
||||
import io.airbyte.cdk.load.table.directload.DirectLoadInitialStatus
|
||||
import io.airbyte.cdk.load.table.directload.DirectLoadTableAppendStreamLoader
|
||||
import io.airbyte.cdk.load.table.directload.DirectLoadTableAppendTruncateStreamLoader
|
||||
import io.airbyte.cdk.load.table.directload.DirectLoadTableDedupStreamLoader
|
||||
import io.airbyte.cdk.load.table.directload.DirectLoadTableDedupTruncateStreamLoader
|
||||
import io.airbyte.cdk.load.table.directload.DirectLoadTableExecutionConfig
|
||||
import io.airbyte.cdk.load.write.DestinationWriter
|
||||
import io.airbyte.cdk.load.write.StreamLoader
|
||||
import io.airbyte.cdk.load.write.StreamStateStore
|
||||
@@ -26,7 +33,7 @@ import io.airbyte.cdk.load.write.StreamStateStore
|
||||
*/
|
||||
class DirectLoadTableWriter(
|
||||
private val internalNamespace: String,
|
||||
private val names: TableCatalog,
|
||||
private val names: DestinationCatalog,
|
||||
private val stateGatherer: DatabaseInitialStatusGatherer<DirectLoadInitialStatus>,
|
||||
private val destinationHandler: DatabaseHandler,
|
||||
private val schemaEvolutionClient: TableSchemaEvolutionClient,
|
||||
@@ -36,19 +43,18 @@ class DirectLoadTableWriter(
|
||||
) : DestinationWriter {
|
||||
private lateinit var initialStatuses: Map<DestinationStream, DirectLoadInitialStatus>
|
||||
override suspend fun setup() {
|
||||
val namespaces =
|
||||
names.values.map { (tableNames, _) -> tableNames.finalTableName!!.namespace }.toSet()
|
||||
val namespaces = names.streams.map { it.tableSchema.tableNames.finalTableName!!.namespace }
|
||||
destinationHandler.createNamespaces(namespaces + listOf(internalNamespace))
|
||||
|
||||
initialStatuses = stateGatherer.gatherInitialStatus(names)
|
||||
initialStatuses = stateGatherer.gatherInitialStatus()
|
||||
}
|
||||
|
||||
override fun createStreamLoader(stream: DestinationStream): StreamLoader {
|
||||
val initialStatus = initialStatuses[stream]!!
|
||||
val tableNameInfo = names[stream]!!
|
||||
val realTableName = tableNameInfo.tableNames.finalTableName!!
|
||||
val tempTableName = tempTableNameGenerator.generate(realTableName)
|
||||
val columnNameMapping = tableNameInfo.columnNameMapping
|
||||
val realTableName = stream.tableSchema.tableNames.finalTableName!!
|
||||
val tempTableName = stream.tableSchema.tableNames.tempTableName!!
|
||||
val columnNameMapping =
|
||||
ColumnNameMapping(stream.tableSchema.columnSchema.inputToFinalColumnNames)
|
||||
return when (stream.minimumGenerationId) {
|
||||
0L ->
|
||||
when (stream.importType) {
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping
|
||||
|
||||
data class AlterTableReport(
|
||||
val columnsToAdd: Set<String>,
|
||||
val columnsToRemove: Set<String>,
|
||||
val columnsToChangeType: Set<String>,
|
||||
) {
|
||||
/**
|
||||
* A no-op for an AlterTableReport is when the existing table matches the expected schema
|
||||
*
|
||||
* @return whether the schema matches
|
||||
*/
|
||||
val isNoOp =
|
||||
columnsToAdd.isEmpty() && columnsToRemove.isEmpty() && columnsToChangeType.isEmpty()
|
||||
}
|
||||
@@ -1,260 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping
|
||||
|
||||
import io.airbyte.cdk.load.command.DestinationCatalog
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.orchestration.db.ColumnNameGenerator
|
||||
import io.airbyte.cdk.load.orchestration.db.FinalTableNameGenerator
|
||||
import io.airbyte.cdk.load.orchestration.db.RawTableNameGenerator
|
||||
import io.airbyte.cdk.load.orchestration.db.TableNames
|
||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||
import io.airbyte.cdk.load.table.TableName
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import io.micronaut.context.annotation.Factory
|
||||
import javax.inject.Singleton
|
||||
import org.apache.commons.codec.digest.DigestUtils
|
||||
|
||||
private val LOGGER = KotlinLogging.logger {}
|
||||
const val DEFAULT_AIRBYTE_INTERNAL_NAMESPACE = "airbyte_internal"
|
||||
|
||||
data class TableNameInfo(val tableNames: TableNames, val columnNameMapping: ColumnNameMapping)
|
||||
|
||||
data class TableCatalog(private val catalog: Map<DestinationStream, TableNameInfo>) :
|
||||
Map<DestinationStream, TableNameInfo> by catalog {
|
||||
fun getMappedColumnName(stream: DestinationStream, colName: String): String? =
|
||||
this[stream]?.columnNameMapping?.get(colName)
|
||||
}
|
||||
|
||||
data class TableCatalogByDescriptor(
|
||||
private val catalog: Map<DestinationStream.Descriptor, TableNameInfo>
|
||||
) : Map<DestinationStream.Descriptor, TableNameInfo> by catalog {
|
||||
fun getFinalTableName(desc: DestinationStream.Descriptor): TableName? =
|
||||
this[desc]?.tableNames?.finalTableName
|
||||
}
|
||||
|
||||
@Factory
|
||||
class TableCatalogFactory {
|
||||
@Singleton
|
||||
fun getTableCatalog(
|
||||
catalog: DestinationCatalog,
|
||||
// Raw table generator is optional. Direct-load destinations don't need it
|
||||
// (unless they were previously T+D destinations, in which case it's still required
|
||||
// so that we maintain stable names with the T+D version)
|
||||
rawTableNameGenerator: RawTableNameGenerator?,
|
||||
finalTableNameGenerator: FinalTableNameGenerator,
|
||||
finalTableColumnNameGenerator: ColumnNameGenerator,
|
||||
): TableCatalog {
|
||||
val processedRawTableNames =
|
||||
if (rawTableNameGenerator != null) {
|
||||
mutableSetOf<TableName>()
|
||||
} else {
|
||||
null
|
||||
}
|
||||
val processedFinalTableNames = mutableSetOf<TableName>()
|
||||
|
||||
val result = mutableMapOf<DestinationStream, TableNameInfo>()
|
||||
|
||||
catalog.streams.forEach { stream ->
|
||||
val originalRawTableName = rawTableNameGenerator?.getTableName(stream.mappedDescriptor)
|
||||
val originalFinalTableName =
|
||||
finalTableNameGenerator.getTableName(stream.mappedDescriptor)
|
||||
val currentRawProcessedName: TableName?
|
||||
val currentFinalProcessedName: TableName
|
||||
|
||||
val rawTableNameColliding =
|
||||
processedRawTableNames?.let { originalRawTableName in it } ?: false
|
||||
val finalTableNameColliding = originalFinalTableName in processedFinalTableNames
|
||||
if (rawTableNameColliding || finalTableNameColliding) {
|
||||
LOGGER.info {
|
||||
"Detected table name collision for ${stream.mappedDescriptor.namespace}.${stream.mappedDescriptor.name}"
|
||||
}
|
||||
// Create a hash-suffixed name to avoid collision
|
||||
val hash =
|
||||
DigestUtils.sha1Hex(
|
||||
"${originalFinalTableName.namespace}&airbyte&${stream.mappedDescriptor.name}"
|
||||
)
|
||||
.substring(0, 3)
|
||||
val newName = "${stream.mappedDescriptor.name}_$hash"
|
||||
|
||||
currentRawProcessedName =
|
||||
rawTableNameGenerator?.getTableName(
|
||||
stream.mappedDescriptor.copy(name = newName)
|
||||
)
|
||||
processedRawTableNames?.add(currentRawProcessedName!!)
|
||||
currentFinalProcessedName =
|
||||
finalTableNameGenerator.getTableName(
|
||||
stream.mappedDescriptor.copy(name = newName)
|
||||
)
|
||||
processedFinalTableNames.add(currentFinalProcessedName)
|
||||
} else {
|
||||
processedRawTableNames?.add(originalRawTableName!!)
|
||||
processedFinalTableNames.add(originalFinalTableName)
|
||||
currentRawProcessedName = originalRawTableName
|
||||
currentFinalProcessedName = originalFinalTableName
|
||||
}
|
||||
|
||||
// Create column name mapping with collision handling
|
||||
val columnNameMapping = createColumnNameMapping(stream, finalTableColumnNameGenerator)
|
||||
|
||||
result[stream] =
|
||||
TableNameInfo(
|
||||
TableNames(
|
||||
rawTableName = currentRawProcessedName,
|
||||
finalTableName = currentFinalProcessedName,
|
||||
),
|
||||
columnNameMapping
|
||||
)
|
||||
}
|
||||
|
||||
return TableCatalog(result)
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates column name mapping with handling for potential collisions using incremental
|
||||
* numbering, with advanced resolution for truncation cases.
|
||||
*/
|
||||
private fun createColumnNameMapping(
|
||||
stream: DestinationStream,
|
||||
finalTableColumnNameGenerator: ColumnNameGenerator,
|
||||
): ColumnNameMapping {
|
||||
val processedColumnNames = mutableSetOf<ColumnNameGenerator.ColumnName>()
|
||||
val columnMappings = mutableMapOf<String, String>()
|
||||
// Map to track original column names by their truncated versions
|
||||
|
||||
stream.schema.asColumns().forEach { (columnName, _) ->
|
||||
val processedColumnName = finalTableColumnNameGenerator.getColumnName(columnName)
|
||||
|
||||
// Get a unique column name by adding incremental numbers if necessary
|
||||
val finalColumnName =
|
||||
resolveColumnNameCollision(
|
||||
stream,
|
||||
processedColumnName,
|
||||
existingNames = processedColumnNames,
|
||||
originalColumnName = columnName,
|
||||
finalTableColumnNameGenerator,
|
||||
)
|
||||
|
||||
processedColumnNames.add(finalColumnName)
|
||||
columnMappings[columnName] = finalColumnName.displayName
|
||||
}
|
||||
|
||||
return ColumnNameMapping(columnMappings)
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves column name collisions by first trying incremental suffixes (_1, _2, etc.) If that
|
||||
* doesn't work due to name truncation, uses the more powerful superResolveColumnCollisions.
|
||||
*
|
||||
* @param processedName The name after initial processing by the column name generator
|
||||
* @param existingNames Set of names already used for other columns
|
||||
* @param originalColumnName The original column name before processing
|
||||
*/
|
||||
private fun resolveColumnNameCollision(
|
||||
stream: DestinationStream,
|
||||
processedName: ColumnNameGenerator.ColumnName,
|
||||
existingNames: Set<ColumnNameGenerator.ColumnName>,
|
||||
originalColumnName: String,
|
||||
finalTableColumnNameGenerator: ColumnNameGenerator,
|
||||
): ColumnNameGenerator.ColumnName {
|
||||
// If processed name is unique, use it
|
||||
if (!existingNames.hasConflict(processedName)) {
|
||||
return processedName
|
||||
}
|
||||
|
||||
LOGGER.info {
|
||||
"Detected column name collision for ${stream.mappedDescriptor.namespace}.${stream.mappedDescriptor.name}.$originalColumnName"
|
||||
}
|
||||
|
||||
// Try adding incremental suffixes until we find a non-colliding name
|
||||
var counter = 1
|
||||
var candidateName: ColumnNameGenerator.ColumnName
|
||||
var previousCandidate = processedName
|
||||
|
||||
do {
|
||||
// Generate candidate name by adding numeric suffix
|
||||
candidateName =
|
||||
finalTableColumnNameGenerator.getColumnName("${originalColumnName}_$counter")
|
||||
|
||||
// Check if we're making progress (detecting potential truncation)
|
||||
if (candidateName.canonicalName == previousCandidate.canonicalName) {
|
||||
// We're not making progress, likely due to name truncation
|
||||
// Use the more powerful resolution method with the ORIGINAL column name
|
||||
return superResolveColumnCollisions(
|
||||
originalColumnName,
|
||||
existingNames,
|
||||
processedName.canonicalName.length,
|
||||
finalTableColumnNameGenerator,
|
||||
)
|
||||
}
|
||||
|
||||
previousCandidate = candidateName
|
||||
counter++
|
||||
} while (existingNames.hasConflict(candidateName))
|
||||
|
||||
return candidateName
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a name of the format `<prefix><length><suffix>` when simple suffix-based conflict
|
||||
* resolution fails due to name truncation. E.g. for affixLength=3: "veryLongName" -> "ver6ame"
|
||||
*
|
||||
* @param originalName The original column name that caused collision
|
||||
* @param existingNames Set of existing column names to avoid collision with
|
||||
* @param maximumColumnNameLength The maximum allowed length for the column name
|
||||
*/
|
||||
private fun superResolveColumnCollisions(
|
||||
originalName: String,
|
||||
existingNames: Set<ColumnNameGenerator.ColumnName>,
|
||||
maximumColumnNameLength: Int,
|
||||
finalTableColumnNameGenerator: ColumnNameGenerator,
|
||||
): ColumnNameGenerator.ColumnName {
|
||||
// Assume that the <length> portion can be expressed in at most 5 characters.
|
||||
// If someone is giving us a column name that's longer than 99999 characters,
|
||||
// that's just being silly.
|
||||
val affixLength = (maximumColumnNameLength - 5) / 2
|
||||
|
||||
// If, after reserving 5 characters for the length, we can't fit the affixes,
|
||||
// just give up. That means the destination is trying to restrict us to a
|
||||
// 6-character column name, which is just silly.
|
||||
if (affixLength <= 0) {
|
||||
throw IllegalArgumentException(
|
||||
"Cannot solve column name collision: $originalName. We recommend removing this column to continue syncing."
|
||||
)
|
||||
}
|
||||
|
||||
val prefix = originalName.substring(0, affixLength)
|
||||
val suffix = originalName.substring(originalName.length - affixLength, originalName.length)
|
||||
|
||||
val length = originalName.length - 2 * affixLength
|
||||
val newColumnName = finalTableColumnNameGenerator.getColumnName("$prefix$length$suffix")
|
||||
|
||||
// If there's still a collision after this, just give up.
|
||||
// We could try to be more clever, but this is already a pretty rare case.
|
||||
if (existingNames.hasConflict(newColumnName)) {
|
||||
throw IllegalArgumentException(
|
||||
"Cannot solve column name collision: $originalName. We recommend removing this column to continue syncing."
|
||||
)
|
||||
}
|
||||
|
||||
return newColumnName
|
||||
}
|
||||
|
||||
@Singleton
|
||||
fun getTableCatalogByDescriptor(map: TableCatalog): TableCatalogByDescriptor {
|
||||
return TableCatalogByDescriptor(map.mapKeys { (k, _) -> k.mappedDescriptor })
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* can't just use `.contains()`, because we don't care whether the column names have the same
|
||||
* display name. We only care about the canonical name.
|
||||
*
|
||||
* (arguably we could override equals/hashcode? But that would make writing tests more difficult,
|
||||
* because it's not an intuitive behavior)
|
||||
*/
|
||||
private fun Collection<ColumnNameGenerator.ColumnName>.hasConflict(
|
||||
candidate: ColumnNameGenerator.ColumnName
|
||||
) = this.any { it.canonicalName == candidate.canonicalName }
|
||||
@@ -1,54 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping
|
||||
|
||||
import io.airbyte.cdk.load.orchestration.db.DatabaseInitialStatus
|
||||
import java.time.Instant
|
||||
|
||||
data class TypingDedupingDatabaseInitialStatus(
|
||||
/** Initial status of the final table, or null if the table doesn't exist yet. */
|
||||
val finalTableStatus: FinalTableInitialStatus?,
|
||||
val rawTableStatus: RawTableInitialStatus?,
|
||||
val tempRawTableStatus: RawTableInitialStatus?,
|
||||
) : DatabaseInitialStatus
|
||||
|
||||
data class FinalTableInitialStatus(
|
||||
val isSchemaMismatch: Boolean,
|
||||
val isEmpty: Boolean,
|
||||
/** The generation ID of _any_ record from the final table, or `null` if the table is empty. */
|
||||
val finalTableGenerationId: Long?,
|
||||
)
|
||||
|
||||
data class RawTableInitialStatus(
|
||||
/**
|
||||
* Whether there were any records with null `_airbyte_loaded_at`, at the time that this status
|
||||
* was fetched.
|
||||
*/
|
||||
val hasUnprocessedRecords: Boolean,
|
||||
/**
|
||||
* The highest timestamp such that all records in `SELECT * FROM raw_table WHERE
|
||||
* _airbyte_extracted_at <= ?` have a nonnull `_airbyte_loaded_at`.
|
||||
*
|
||||
* Destinations MAY use this value to only run T+D on records with `_airbyte_extracted_at > ?`
|
||||
* (note the strictly-greater comparison).
|
||||
*/
|
||||
val maxProcessedTimestamp: Instant?,
|
||||
) {
|
||||
companion object {
|
||||
/**
|
||||
* If the raw table doesn't exist, we'll obviously need to create it. After creating a raw
|
||||
* table, this is its default state (i.e. it has no records, so there are by definition no
|
||||
* unprocessed records, and no processed records).
|
||||
*/
|
||||
val emptyTableStatus = RawTableInitialStatus(false, maxProcessedTimestamp = null)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Many callers need to do a `create table if not exists`. This is a utility method to update the
|
||||
* initial status accordingly - i.e. if the table already existed, retain its status; otherwise, use
|
||||
* the empty table status.
|
||||
*/
|
||||
fun RawTableInitialStatus?.reify() = this ?: RawTableInitialStatus.emptyTableStatus
|
||||
@@ -1,9 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping
|
||||
|
||||
data class TypingDedupingExecutionConfig(
|
||||
val rawTableSuffix: String,
|
||||
)
|
||||
@@ -1,136 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping
|
||||
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.orchestration.db.DatabaseHandler
|
||||
import io.airbyte.cdk.load.orchestration.db.TableNames
|
||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||
import io.airbyte.cdk.load.table.TableName
|
||||
import io.airbyte.cdk.load.table.TableSuffixes.SOFT_RESET_SUFFIX
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import java.time.Instant
|
||||
|
||||
private val logger = KotlinLogging.logger {}
|
||||
|
||||
class TypingDedupingFinalTableOperations(
|
||||
private val sqlGenerator: TypingDedupingSqlGenerator,
|
||||
private val databaseHandler: DatabaseHandler,
|
||||
) {
|
||||
fun createFinalTable(
|
||||
stream: DestinationStream,
|
||||
finalTableName: TableName,
|
||||
columnNameMapping: ColumnNameMapping,
|
||||
finalTableSuffix: String,
|
||||
replace: Boolean
|
||||
) {
|
||||
logger.info {
|
||||
"Creating final table for stream ${stream.mappedDescriptor.toPrettyString()} with name ${finalTableName.toPrettyString()}"
|
||||
}
|
||||
databaseHandler.execute(
|
||||
sqlGenerator.createFinalTable(
|
||||
stream,
|
||||
finalTableName,
|
||||
columnNameMapping,
|
||||
finalTableSuffix,
|
||||
replace = replace
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/** Reset the final table using a temp table or ALTER existing table's columns. */
|
||||
fun softResetFinalTable(
|
||||
stream: DestinationStream,
|
||||
tableNames: TableNames,
|
||||
columnNameMapping: ColumnNameMapping,
|
||||
) {
|
||||
logger.info {
|
||||
"Executing soft reset for stream ${stream.mappedDescriptor.toPrettyString()} on tables ${tableNames.toPrettyString()}"
|
||||
}
|
||||
databaseHandler.execute(
|
||||
sqlGenerator.prepareTablesForSoftReset(stream, tableNames, columnNameMapping)
|
||||
)
|
||||
typeAndDedupe(
|
||||
stream,
|
||||
tableNames,
|
||||
columnNameMapping,
|
||||
maxProcessedTimestamp = null,
|
||||
finalTableSuffix = SOFT_RESET_SUFFIX,
|
||||
)
|
||||
databaseHandler.execute(
|
||||
sqlGenerator.overwriteFinalTable(
|
||||
stream,
|
||||
tableNames.finalTableName!!,
|
||||
finalTableSuffix = SOFT_RESET_SUFFIX
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to atomically swap the final table from the temp version. This could be destination
|
||||
* specific, INSERT INTO..SELECT * and DROP TABLE OR CREATE OR REPLACE ... SELECT *, DROP TABLE
|
||||
*/
|
||||
fun overwriteFinalTable(
|
||||
stream: DestinationStream,
|
||||
finalTableName: TableName,
|
||||
finalTableSuffix: String,
|
||||
) {
|
||||
logger.info {
|
||||
"Overwriting final table for stream ${stream.mappedDescriptor.toPrettyString()} with name ${finalTableName.toPrettyString()} using temp table with suffix $finalTableSuffix"
|
||||
}
|
||||
databaseHandler.execute(
|
||||
sqlGenerator.overwriteFinalTable(
|
||||
stream,
|
||||
finalTableName,
|
||||
finalTableSuffix = finalTableSuffix
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
fun typeAndDedupe(
|
||||
stream: DestinationStream,
|
||||
tableNames: TableNames,
|
||||
columnNameMapping: ColumnNameMapping,
|
||||
maxProcessedTimestamp: Instant?,
|
||||
finalTableSuffix: String
|
||||
) {
|
||||
try {
|
||||
logger.info {
|
||||
"Attempting typing and deduping for stream ${stream.mappedDescriptor.toPrettyString()} on tables ${tableNames.toPrettyString()} with suffix $finalTableSuffix"
|
||||
}
|
||||
val unsafeSql =
|
||||
sqlGenerator.updateFinalTable(
|
||||
stream,
|
||||
tableNames,
|
||||
columnNameMapping,
|
||||
finalTableSuffix = finalTableSuffix,
|
||||
maxProcessedTimestamp = maxProcessedTimestamp,
|
||||
useExpensiveSaferCasting = false,
|
||||
)
|
||||
databaseHandler.execute(unsafeSql)
|
||||
} catch (e: Exception) {
|
||||
if (sqlGenerator.supportsExpensiveSaferCasting) {
|
||||
logger.info(e) {
|
||||
"Encountered Exception on unsafe SQL for stream ${stream.mappedDescriptor.toPrettyString()} on tables ${tableNames.toPrettyString()} with suffix $finalTableSuffix, re-attempting with error handling"
|
||||
}
|
||||
val saferSql =
|
||||
sqlGenerator.updateFinalTable(
|
||||
stream,
|
||||
tableNames,
|
||||
columnNameMapping,
|
||||
finalTableSuffix = finalTableSuffix,
|
||||
maxProcessedTimestamp = maxProcessedTimestamp,
|
||||
useExpensiveSaferCasting = true,
|
||||
)
|
||||
databaseHandler.execute(saferSql)
|
||||
} else {
|
||||
logger.info(e) {
|
||||
"Encountered Exception on unsafe SQL for stream ${stream.mappedDescriptor.toPrettyString()} on tables ${tableNames.toPrettyString()} with suffix $finalTableSuffix, not retrying"
|
||||
}
|
||||
throw e
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,42 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping
|
||||
|
||||
import io.airbyte.cdk.load.table.TableName
|
||||
|
||||
interface TypingDedupingRawTableOperations {
|
||||
/**
|
||||
* Prepare the raw table, including any associated blob storage. Similar to [createFinalTable],
|
||||
* accepts a [suffix] parameter, which should be used in conjunction with [overwriteRawTable].
|
||||
*
|
||||
* @param replace If true, then replace existing resources with empty e.g. tables. If false,
|
||||
* then leave existing resources untouched.
|
||||
*/
|
||||
fun prepareRawTable(rawTableName: TableName, suffix: String, replace: Boolean = false)
|
||||
|
||||
/**
|
||||
* Swap the "temporary" raw table into the "real" raw table. For example, `DROP TABLE IF NOT
|
||||
* EXISTS airbyte_internal.foo; ALTER TABLE airbyte_internal.foo_tmp RENAME TO foo`.
|
||||
*/
|
||||
fun overwriteRawTable(rawTableName: TableName, suffix: String)
|
||||
|
||||
/**
|
||||
* Copy all records from the temporary raw table into the real raw table, then drop the
|
||||
* temporary raw table. For example `INSERT INTO airbyte_internal.foo SELECT * FROM
|
||||
* airbyte_internal.foo_tmp; DROP TABLE airbyte_internal.foo_tmp`.
|
||||
*/
|
||||
fun transferFromTempRawTable(rawTableName: TableName, suffix: String)
|
||||
|
||||
/**
|
||||
* Get the generation of a single record in the raw table. Not necessarily the min or max
|
||||
* generation, just _any_ record.
|
||||
*
|
||||
* [TypingDedupingStreamLoader] is responsible for orchestrating the raw tables so that the temp
|
||||
* raw table always contains exactly one generation.
|
||||
*
|
||||
* @return The generation ID of a record in the raw table, or `null` if the raw table is empty.
|
||||
*/
|
||||
fun getRawTableGeneration(rawTableName: TableName, suffix: String): Long?
|
||||
}
|
||||
@@ -1,145 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping
|
||||
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.orchestration.db.Sql
|
||||
import io.airbyte.cdk.load.orchestration.db.TableNames
|
||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||
import io.airbyte.cdk.load.table.TableName
|
||||
import io.airbyte.cdk.load.table.TableSuffixes.SOFT_RESET_SUFFIX
|
||||
import java.time.Instant
|
||||
|
||||
interface TypingDedupingSqlGenerator {
|
||||
/**
|
||||
* Generate a SQL statement to create a fresh table to match the given stream.
|
||||
*
|
||||
* The generated SQL should throw an exception if the table already exists and `replace` is
|
||||
* false.
|
||||
*
|
||||
* @param finalTableSuffix A suffix to add to the stream name. Useful for full refresh overwrite
|
||||
* syncs, where we write the entire sync to a temp table.
|
||||
* @param replace If true, will overwrite an existing table. If false, will throw an exception
|
||||
* if the table already exists. If you're passing a non-empty prefix, you likely want to set
|
||||
* this to true.
|
||||
*/
|
||||
fun createFinalTable(
|
||||
stream: DestinationStream,
|
||||
tableName: TableName,
|
||||
columnNameMapping: ColumnNameMapping,
|
||||
finalTableSuffix: String,
|
||||
replace: Boolean
|
||||
): Sql
|
||||
|
||||
/**
|
||||
* Whether [updateFinalTable] actually generates different SQL when `useExpensiveSaferCasting`
|
||||
* is enabled. Some destinations don't have this distinction, and should override this field to
|
||||
* `false`.
|
||||
*/
|
||||
val supportsExpensiveSaferCasting: Boolean
|
||||
get() = true
|
||||
|
||||
/**
|
||||
* Generate a SQL statement to copy new data from the raw table into the final table.
|
||||
*
|
||||
* Responsible for:
|
||||
*
|
||||
* * Pulling new raw records from a table (i.e. records with null _airbyte_loaded_at)
|
||||
* * Extracting the JSON fields and casting to the appropriate types
|
||||
* * Handling errors in those casts
|
||||
* * Merging those typed records into an existing table
|
||||
* * Updating the raw records with SET _airbyte_loaded_at = now()
|
||||
*
|
||||
* Implementing classes are recommended to break this into smaller methods, which can be tested
|
||||
* in isolation. However, this interface only requires a single mega-method.
|
||||
*
|
||||
* @param finalTableSuffix the suffix of the final table to write to. If empty string, writes to
|
||||
* the final table directly. Useful for full refresh overwrite syncs, where we write the entire
|
||||
* sync to a temp table and then swap it into the final table at the end.
|
||||
*
|
||||
* @param minRawTimestamp The latest _airbyte_extracted_at for which all raw records with that
|
||||
* timestamp have already been typed+deduped. Implementations MAY use this value in a
|
||||
* `_airbyte_extracted_at > minRawTimestamp` filter on the raw table to improve query
|
||||
* performance.
|
||||
* @param useExpensiveSaferCasting often the data coming from the source can be faithfully
|
||||
* represented in the destination without issue, and using a "CAST" expression works fine,
|
||||
* however sometimes we get badly typed data. In these cases we can use a more expensive query
|
||||
* which handles casting exceptions.
|
||||
*/
|
||||
fun updateFinalTable(
|
||||
stream: DestinationStream,
|
||||
tableNames: TableNames,
|
||||
columnNameMapping: ColumnNameMapping,
|
||||
finalTableSuffix: String,
|
||||
maxProcessedTimestamp: Instant?,
|
||||
useExpensiveSaferCasting: Boolean,
|
||||
): Sql
|
||||
|
||||
/**
|
||||
* Drop the previous final table, and rename the new final table to match the old final table.
|
||||
*
|
||||
* This method may assume that the stream is an OVERWRITE stream, and that the final suffix is
|
||||
* non-empty. Callers are responsible for verifying those are true.
|
||||
*/
|
||||
fun overwriteFinalTable(
|
||||
stream: DestinationStream,
|
||||
finalTableName: TableName,
|
||||
finalTableSuffix: String,
|
||||
): Sql
|
||||
|
||||
fun clearLoadedAt(stream: DestinationStream, rawTableName: TableName): Sql
|
||||
|
||||
/** Typically we need to create a soft reset temporary table and clear loaded at values */
|
||||
fun prepareTablesForSoftReset(
|
||||
stream: DestinationStream,
|
||||
tableNames: TableNames,
|
||||
columnNameMapping: ColumnNameMapping,
|
||||
): Sql {
|
||||
val createTempTable =
|
||||
createFinalTable(
|
||||
stream,
|
||||
tableNames.finalTableName!!,
|
||||
columnNameMapping,
|
||||
SOFT_RESET_SUFFIX,
|
||||
replace = true
|
||||
)
|
||||
val clearLoadedAt = clearLoadedAt(stream, tableNames.rawTableName!!)
|
||||
return Sql.concat(createTempTable, clearLoadedAt)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* We are switching all destinations away from T+D, to use direct-load tables instead. However, some
|
||||
* destinations will continue to provide a "legacy raw tables" mode, which writes the raw table
|
||||
* format of T+D, but with the actual T+D disabled.
|
||||
*
|
||||
* This sqlgenerator supports that, by simply doing nothing.
|
||||
*/
|
||||
object NoopTypingDedupingSqlGenerator : TypingDedupingSqlGenerator {
|
||||
override fun createFinalTable(
|
||||
stream: DestinationStream,
|
||||
tableName: TableName,
|
||||
columnNameMapping: ColumnNameMapping,
|
||||
finalTableSuffix: String,
|
||||
replace: Boolean
|
||||
) = Sql.empty()
|
||||
|
||||
override fun updateFinalTable(
|
||||
stream: DestinationStream,
|
||||
tableNames: TableNames,
|
||||
columnNameMapping: ColumnNameMapping,
|
||||
finalTableSuffix: String,
|
||||
maxProcessedTimestamp: Instant?,
|
||||
useExpensiveSaferCasting: Boolean
|
||||
) = Sql.empty()
|
||||
|
||||
override fun overwriteFinalTable(
|
||||
stream: DestinationStream,
|
||||
finalTableName: TableName,
|
||||
finalTableSuffix: String
|
||||
) = Sql.empty()
|
||||
|
||||
override fun clearLoadedAt(stream: DestinationStream, rawTableName: TableName) = Sql.empty()
|
||||
}
|
||||
@@ -1,397 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping
|
||||
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.orchestration.db.TableNames
|
||||
import io.airbyte.cdk.load.state.StreamProcessingFailed
|
||||
import io.airbyte.cdk.load.table.ColumnNameMapping
|
||||
import io.airbyte.cdk.load.table.TableSuffixes.NO_SUFFIX
|
||||
import io.airbyte.cdk.load.table.TableSuffixes.TMP_TABLE_SUFFIX
|
||||
import io.airbyte.cdk.load.write.StreamLoader
|
||||
import io.airbyte.cdk.load.write.StreamStateStore
|
||||
import io.github.oshai.kotlinlogging.KotlinLogging
|
||||
import java.time.Instant
|
||||
|
||||
private val logger = KotlinLogging.logger {}
|
||||
|
||||
class TypingDedupingStreamLoader(
|
||||
override val stream: DestinationStream,
|
||||
private val initialStatus: TypingDedupingDatabaseInitialStatus,
|
||||
private val tableNames: TableNames,
|
||||
private val columnNameMapping: ColumnNameMapping,
|
||||
private val rawTableOperations: TypingDedupingRawTableOperations,
|
||||
private val finalTableOperations: TypingDedupingFinalTableOperations,
|
||||
private val disableTypeDedupe: Boolean,
|
||||
private val streamStateStore: StreamStateStore<TypingDedupingExecutionConfig>,
|
||||
) : StreamLoader {
|
||||
private val isTruncateSync =
|
||||
when (stream.minimumGenerationId) {
|
||||
0L -> false
|
||||
stream.generationId -> true
|
||||
else -> {
|
||||
throw IllegalArgumentException("Hybrid refreshes are not yet supported.")
|
||||
}
|
||||
}
|
||||
private lateinit var rawTableSuffix: String
|
||||
private lateinit var finalTmpTableSuffix: String
|
||||
/**
|
||||
* The status of the raw table that "matters" for this sync. Specifically:
|
||||
* * For normal syncs / merge refreshes, this is the status of the real raw table)
|
||||
* * For truncate refreshes, this is the status of the temp raw table (because we never even
|
||||
* look at the real raw table)
|
||||
*/
|
||||
private lateinit var initialRawTableStatus: RawTableInitialStatus
|
||||
|
||||
override suspend fun start() {
|
||||
if (isTruncateSync) {
|
||||
val (rawTableStatus, suffix) = prepareStageForTruncate()
|
||||
initialRawTableStatus = rawTableStatus
|
||||
rawTableSuffix = suffix
|
||||
} else {
|
||||
rawTableSuffix = NO_SUFFIX
|
||||
initialRawTableStatus = prepareStageForNormalSync()
|
||||
}
|
||||
|
||||
if (!disableTypeDedupe) {
|
||||
// Prepare final tables based on sync mode.
|
||||
finalTmpTableSuffix = prepareFinalTable()
|
||||
} else {
|
||||
logger.info { "Typing and deduping disabled, skipping final table initialization" }
|
||||
finalTmpTableSuffix = NO_SUFFIX
|
||||
}
|
||||
|
||||
streamStateStore.put(
|
||||
stream.mappedDescriptor,
|
||||
TypingDedupingExecutionConfig(rawTableSuffix),
|
||||
)
|
||||
}
|
||||
|
||||
private fun prepareStageForTruncate(): Pair<RawTableInitialStatus, String> {
|
||||
/*
|
||||
tl;dr:
|
||||
* if a temp raw table exists, check whether it belongs to the correct generation.
|
||||
* if wrong generation, truncate it.
|
||||
* regardless, write into the temp raw table.
|
||||
* else, if a real raw table exists, check its generation.
|
||||
* if wrong generation, write into a new temp raw table.
|
||||
* else, write into the preexisting real raw table.
|
||||
* else, create a new temp raw table and write into it.
|
||||
*/
|
||||
if (initialStatus.tempRawTableStatus != null) {
|
||||
val tempStageGeneration =
|
||||
rawTableOperations.getRawTableGeneration(
|
||||
tableNames.rawTableName!!,
|
||||
TMP_TABLE_SUFFIX
|
||||
)
|
||||
if (tempStageGeneration == null || tempStageGeneration == stream.generationId) {
|
||||
logger.info {
|
||||
"${stream.mappedDescriptor.toPrettyString()}: truncate sync, and existing temp raw table belongs to generation $tempStageGeneration (== current generation ${stream.generationId}). Retaining it."
|
||||
}
|
||||
// The temp table is from the correct generation. Set up any other resources
|
||||
// (staging file, etc.), but leave the table untouched.
|
||||
rawTableOperations.prepareRawTable(
|
||||
tableNames.rawTableName,
|
||||
TMP_TABLE_SUFFIX,
|
||||
)
|
||||
return Pair(initialStatus.tempRawTableStatus.reify(), TMP_TABLE_SUFFIX)
|
||||
} else {
|
||||
logger.info {
|
||||
"${stream.mappedDescriptor.toPrettyString()}: truncate sync, and existing temp raw table belongs to generation $tempStageGeneration (!= current generation ${stream.generationId}). Truncating it."
|
||||
}
|
||||
// The temp stage is from the wrong generation. Nuke it.
|
||||
rawTableOperations.prepareRawTable(
|
||||
tableNames.rawTableName,
|
||||
TMP_TABLE_SUFFIX,
|
||||
replace = true,
|
||||
)
|
||||
// We nuked the temp raw table, so create a new initial raw table status.
|
||||
return Pair(
|
||||
RawTableInitialStatus.emptyTableStatus,
|
||||
TMP_TABLE_SUFFIX,
|
||||
)
|
||||
}
|
||||
} else if (initialStatus.rawTableStatus != null) {
|
||||
// It's possible to "resume" a truncate sync that was previously already finalized.
|
||||
// In this case, there is no existing temp raw table, and there is a real raw table
|
||||
// which already belongs to the correct generation.
|
||||
// Check for that case now.
|
||||
val realStageGeneration =
|
||||
rawTableOperations.getRawTableGeneration(tableNames.rawTableName!!, NO_SUFFIX)
|
||||
if (realStageGeneration == null || realStageGeneration == stream.generationId) {
|
||||
logger.info {
|
||||
"${stream.mappedDescriptor.toPrettyString()}: truncate sync, no existing temp raw table, and existing real raw table belongs to generation $realStageGeneration (== current generation ${stream.generationId}). Retaining it."
|
||||
}
|
||||
// The real raw table is from the correct generation. Set up any other resources
|
||||
// (staging file, etc.), but leave the table untouched.
|
||||
rawTableOperations.prepareRawTable(tableNames.rawTableName, NO_SUFFIX)
|
||||
return Pair(initialStatus.rawTableStatus.reify(), NO_SUFFIX)
|
||||
} else {
|
||||
logger.info {
|
||||
"${stream.mappedDescriptor.toPrettyString()}: truncate sync, existing real raw table belongs to generation $realStageGeneration (!= current generation ${stream.generationId}), and no preexisting temp raw table. Creating a temp raw table."
|
||||
}
|
||||
// We're initiating a new truncate refresh. Create a new temp stage.
|
||||
rawTableOperations.prepareRawTable(
|
||||
tableNames.rawTableName,
|
||||
TMP_TABLE_SUFFIX,
|
||||
)
|
||||
return Pair(
|
||||
// Create a fresh raw table status, since we created a fresh temp stage.
|
||||
RawTableInitialStatus.emptyTableStatus,
|
||||
TMP_TABLE_SUFFIX,
|
||||
)
|
||||
}
|
||||
} else {
|
||||
logger.info {
|
||||
"${stream.mappedDescriptor.toPrettyString()}: truncate sync, and no preexisting temp or raw table. Creating a temp raw table."
|
||||
}
|
||||
// We're initiating a new truncate refresh. Create a new temp stage.
|
||||
rawTableOperations.prepareRawTable(
|
||||
tableNames.rawTableName!!,
|
||||
TMP_TABLE_SUFFIX,
|
||||
)
|
||||
return Pair(
|
||||
// Create a fresh raw table status, since we created a fresh temp stage.
|
||||
RawTableInitialStatus.emptyTableStatus,
|
||||
TMP_TABLE_SUFFIX,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private fun prepareStageForNormalSync(): RawTableInitialStatus {
|
||||
logger.info {
|
||||
"${stream.mappedDescriptor.toPrettyString()}: non-truncate sync. Creating raw table if not exists."
|
||||
}
|
||||
rawTableOperations.prepareRawTable(tableNames.rawTableName!!, NO_SUFFIX)
|
||||
if (initialStatus.tempRawTableStatus != null) {
|
||||
logger.info {
|
||||
"${stream.mappedDescriptor.toPrettyString()}: non-truncate sync, but temp raw table exists. Transferring it to real raw table."
|
||||
}
|
||||
// There was a previous truncate refresh attempt, which failed, and left some
|
||||
// records behind.
|
||||
// Retrieve those records and put them in the real stage.
|
||||
// This is necessary to avoid certain data loss scenarios.
|
||||
// (specifically: a user initiates a truncate sync, which fails, but emits some records.
|
||||
// It also emits a state message for "resumable" full refresh.
|
||||
// The user then initiates an incremental sync, which runs using that state.
|
||||
// In this case, we MUST retain the records from the truncate attempt.)
|
||||
rawTableOperations.transferFromTempRawTable(tableNames.rawTableName, TMP_TABLE_SUFFIX)
|
||||
|
||||
// We need to combine the raw table statuses from the real and temp raw tables.
|
||||
val hasUnprocessedRecords =
|
||||
initialStatus.tempRawTableStatus.hasUnprocessedRecords ||
|
||||
(initialStatus.rawTableStatus?.hasUnprocessedRecords ?: false)
|
||||
// Pick the earlier min timestamp.
|
||||
val maxProcessedTimestamp: Instant? =
|
||||
initialStatus.rawTableStatus?.maxProcessedTimestamp?.let { realRawTableTimestamp ->
|
||||
initialStatus.tempRawTableStatus.maxProcessedTimestamp?.let {
|
||||
tempRawTableTimestamp ->
|
||||
if (realRawTableTimestamp.isBefore(tempRawTableTimestamp)) {
|
||||
realRawTableTimestamp
|
||||
} else {
|
||||
tempRawTableTimestamp
|
||||
}
|
||||
}
|
||||
?: realRawTableTimestamp
|
||||
}
|
||||
?: initialStatus.tempRawTableStatus.maxProcessedTimestamp
|
||||
val updatedStatus =
|
||||
RawTableInitialStatus(
|
||||
hasUnprocessedRecords = hasUnprocessedRecords,
|
||||
maxProcessedTimestamp = maxProcessedTimestamp,
|
||||
)
|
||||
logger.info {
|
||||
"${stream.mappedDescriptor.toPrettyString()}: After record transfer, initial raw table status is $updatedStatus."
|
||||
}
|
||||
return updatedStatus
|
||||
} else {
|
||||
val initialRawTableStatus = initialStatus.rawTableStatus.reify()
|
||||
logger.info {
|
||||
"${stream.mappedDescriptor.toPrettyString()}: non-truncate sync and no temp raw table. Initial raw table status is $initialRawTableStatus."
|
||||
}
|
||||
return initialRawTableStatus
|
||||
}
|
||||
}
|
||||
|
||||
private fun prepareFinalTable(): String {
|
||||
// No special handling if final table doesn't exist, just create and return
|
||||
if (initialStatus.finalTableStatus == null) {
|
||||
logger.info {
|
||||
"Final table does not exist for stream ${stream.mappedDescriptor.toPrettyString()}, creating ${tableNames.finalTableName!!.toPrettyString()}."
|
||||
}
|
||||
finalTableOperations.createFinalTable(
|
||||
stream,
|
||||
tableNames.finalTableName!!,
|
||||
columnNameMapping,
|
||||
NO_SUFFIX,
|
||||
replace = false
|
||||
)
|
||||
return NO_SUFFIX
|
||||
}
|
||||
|
||||
logger.info { "Final Table exists for stream ${stream.mappedDescriptor.toPrettyString()}" }
|
||||
// The table already exists. Decide whether we're writing to it directly, or
|
||||
// using a tmp table.
|
||||
if (isTruncateSync) {
|
||||
if (
|
||||
initialStatus.finalTableStatus.isEmpty ||
|
||||
initialStatus.finalTableStatus.finalTableGenerationId == null
|
||||
) {
|
||||
if (!initialStatus.finalTableStatus.isSchemaMismatch) {
|
||||
logger.info {
|
||||
"Truncate sync, and final table is empty and has correct schema. Writing to it directly."
|
||||
}
|
||||
return NO_SUFFIX
|
||||
} else {
|
||||
// No point soft resetting an empty table. We'll just do an overwrite later.
|
||||
logger.info {
|
||||
"Truncate sync, and final table is empty, but has the wrong schema. Using a temp final table."
|
||||
}
|
||||
return prepareFinalTableForOverwrite()
|
||||
}
|
||||
} else if (
|
||||
initialStatus.finalTableStatus.finalTableGenerationId >= stream.minimumGenerationId
|
||||
) {
|
||||
if (!initialStatus.finalTableStatus.isSchemaMismatch) {
|
||||
logger.info {
|
||||
"Truncate sync, and final table matches our generation and has correct schema. Writing to it directly."
|
||||
}
|
||||
return NO_SUFFIX
|
||||
} else {
|
||||
logger.info {
|
||||
"Truncate sync, and final table matches our generation, but has the wrong schema. Writing to it directly, but triggering a soft reset first."
|
||||
}
|
||||
finalTableOperations.softResetFinalTable(stream, tableNames, columnNameMapping)
|
||||
return NO_SUFFIX
|
||||
}
|
||||
} else {
|
||||
// The final table is in the wrong generation. Use a temp final table.
|
||||
return prepareFinalTableForOverwrite()
|
||||
}
|
||||
} else {
|
||||
if (initialStatus.finalTableStatus.isSchemaMismatch) {
|
||||
// We're loading data directly into the existing table.
|
||||
// Make sure it has the right schema.
|
||||
// Also, if a raw table migration wants us to do a soft reset, do that
|
||||
// here.
|
||||
logger.info {
|
||||
"Executing soft-reset on final table of stream ${stream.mappedDescriptor}"
|
||||
}
|
||||
finalTableOperations.softResetFinalTable(stream, tableNames, columnNameMapping)
|
||||
}
|
||||
return NO_SUFFIX
|
||||
}
|
||||
}
|
||||
|
||||
private fun prepareFinalTableForOverwrite(): String {
|
||||
if (
|
||||
initialStatus.finalTableStatus?.isEmpty != true ||
|
||||
initialStatus.finalTableStatus.isSchemaMismatch
|
||||
) {
|
||||
// overwrite an existing tmp table if needed.
|
||||
finalTableOperations.createFinalTable(
|
||||
stream,
|
||||
tableNames.finalTableName!!,
|
||||
columnNameMapping,
|
||||
TMP_TABLE_SUFFIX,
|
||||
replace = true
|
||||
)
|
||||
logger.info {
|
||||
"Using temp final table for table ${stream.mappedDescriptor.toPrettyString()}, this will be overwritten at end of sync"
|
||||
}
|
||||
// We want to overwrite an existing table. Write into a tmp table.
|
||||
// We'll overwrite the table at the end of the sync.
|
||||
return TMP_TABLE_SUFFIX
|
||||
}
|
||||
|
||||
logger.info {
|
||||
"Final Table for stream ${stream.mappedDescriptor.toPrettyString()} is empty and matches the expected v2 format, writing to table directly"
|
||||
}
|
||||
return NO_SUFFIX
|
||||
}
|
||||
|
||||
override suspend fun close(hadNonzeroRecords: Boolean, streamFailure: StreamProcessingFailed?) {
|
||||
val streamSuccessful = streamFailure == null
|
||||
// Overwrite the raw table before doing anything else.
|
||||
// This ensures that if T+D fails, we can easily retain the records on the next sync.
|
||||
// It also means we don't need to run T+D using the temp raw table,
|
||||
// which is possible (`typeAndDedupe(streamConfig.id.copy(rawName = streamConfig.id.rawName
|
||||
// + suffix))`
|
||||
// but annoying and confusing.
|
||||
if (isTruncateSync && streamSuccessful && rawTableSuffix.isNotEmpty()) {
|
||||
logger.info {
|
||||
"Overwriting raw table for ${stream.mappedDescriptor.toPrettyString()} because this is a truncate sync, we received a stream success message, and are using a temporary raw table."
|
||||
}
|
||||
rawTableOperations.overwriteRawTable(tableNames.rawTableName!!, rawTableSuffix)
|
||||
} else {
|
||||
logger.info {
|
||||
"Not overwriting raw table for ${stream.mappedDescriptor.toPrettyString()}. Truncate sync: $isTruncateSync; stream success: $streamSuccessful; raw table suffix: \"$rawTableSuffix\""
|
||||
}
|
||||
}
|
||||
|
||||
if (disableTypeDedupe) {
|
||||
logger.info {
|
||||
"Typing and deduping disabled, skipping final table finalization. Raw records can be found at ${tableNames.rawTableName!!.toPrettyString()}"
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Normal syncs should T+D regardless of status, so the user sees progress after every
|
||||
// attempt.
|
||||
// We know this is a normal sync, so initialRawTableStatus is nonnull.
|
||||
if (!isTruncateSync && !hadNonzeroRecords && !initialRawTableStatus.hasUnprocessedRecords) {
|
||||
logger.info {
|
||||
"Skipping typing and deduping for stream ${stream.mappedDescriptor.toPrettyString()} because it had no records during this sync and no unprocessed records from a previous sync."
|
||||
}
|
||||
} else if (
|
||||
isTruncateSync &&
|
||||
(!streamSuccessful ||
|
||||
(!hadNonzeroRecords && !initialRawTableStatus.hasUnprocessedRecords))
|
||||
) {
|
||||
// But truncate syncs should only T+D if the sync was successful, since we're T+Ding
|
||||
// into a temp final table anyway.
|
||||
// We only run T+D if the current sync had some records, or a previous attempt wrote
|
||||
// some records to the temp raw table.
|
||||
logger.info {
|
||||
"Skipping typing and deduping for stream ${stream.mappedDescriptor.toPrettyString()} running as truncate sync. Stream success: $streamSuccessful; had nonzero records: $hadNonzeroRecords; temp raw table had records: ${initialRawTableStatus.hasUnprocessedRecords}"
|
||||
}
|
||||
} else {
|
||||
// When targeting the temp final table, we want to read all the raw records
|
||||
// because the temp final table is always a full rebuild. Typically, this is equivalent
|
||||
// to filtering on timestamp, but might as well be explicit.
|
||||
val maxProcessedTimestamp =
|
||||
if (finalTmpTableSuffix.isEmpty()) {
|
||||
initialRawTableStatus.maxProcessedTimestamp
|
||||
} else {
|
||||
null
|
||||
}
|
||||
finalTableOperations.typeAndDedupe(
|
||||
stream,
|
||||
tableNames,
|
||||
columnNameMapping,
|
||||
maxProcessedTimestamp = maxProcessedTimestamp,
|
||||
finalTableSuffix = finalTmpTableSuffix
|
||||
)
|
||||
}
|
||||
|
||||
// We want to run this independently of whether we ran T+D.
|
||||
// E.g. it's valid for a sync to emit 0 records (e.g. the source table is legitimately
|
||||
// empty), in which case we want to overwrite the final table with an empty table.
|
||||
if (isTruncateSync && streamSuccessful && finalTmpTableSuffix.isNotBlank()) {
|
||||
logger.info {
|
||||
"Overwriting final table for ${stream.mappedDescriptor.toPrettyString()} because this is a truncate sync, we received a stream success message, and we are using a temp final table.."
|
||||
}
|
||||
finalTableOperations.overwriteFinalTable(
|
||||
stream,
|
||||
tableNames.finalTableName!!,
|
||||
finalTableSuffix = finalTmpTableSuffix
|
||||
)
|
||||
} else {
|
||||
logger.info {
|
||||
"Not overwriting final table for ${stream.mappedDescriptor.toPrettyString()}. Truncate sync: $isTruncateSync; stream success: $streamSuccessful; final table suffix not blank: ${finalTmpTableSuffix.isNotBlank()}"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,88 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping
|
||||
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.orchestration.db.DatabaseHandler
|
||||
import io.airbyte.cdk.load.orchestration.db.DatabaseInitialStatusGatherer
|
||||
import io.airbyte.cdk.load.write.DestinationWriter
|
||||
import io.airbyte.cdk.load.write.StreamLoader
|
||||
import io.airbyte.cdk.load.write.StreamStateStore
|
||||
import java.util.concurrent.Executors
|
||||
import kotlinx.coroutines.asCoroutineDispatcher
|
||||
import kotlinx.coroutines.launch
|
||||
import kotlinx.coroutines.runBlocking
|
||||
|
||||
class TypingDedupingWriter(
|
||||
private val names: TableCatalog,
|
||||
private val stateGatherer: DatabaseInitialStatusGatherer<TypingDedupingDatabaseInitialStatus>,
|
||||
private val databaseHandler: DatabaseHandler,
|
||||
private val rawTableOperations: TypingDedupingRawTableOperations,
|
||||
private val finalTableOperations: TypingDedupingFinalTableOperations,
|
||||
private val disableTypeDedupe: Boolean,
|
||||
private val streamStateStore: StreamStateStore<TypingDedupingExecutionConfig>,
|
||||
) : DestinationWriter {
|
||||
private lateinit var initialStatuses:
|
||||
Map<DestinationStream, TypingDedupingDatabaseInitialStatus>
|
||||
|
||||
override suspend fun setup() {
|
||||
Executors.newFixedThreadPool(10).asCoroutineDispatcher().use { dispatcher ->
|
||||
val namespaces =
|
||||
names.values.map { (tableNames, _) -> tableNames.rawTableName!!.namespace } +
|
||||
names.values.map { (tableNames, _) -> tableNames.finalTableName!!.namespace }
|
||||
databaseHandler.createNamespaces(namespaces.toSet())
|
||||
|
||||
val initialInitialStatuses:
|
||||
Map<DestinationStream, TypingDedupingDatabaseInitialStatus> =
|
||||
stateGatherer.gatherInitialStatus(names)
|
||||
|
||||
// TODO migrations - we should probably actually drop all existing migrations as part of
|
||||
// this project, but eventually we'll need some solution here
|
||||
|
||||
// If we have a schema mismatch, then execute a soft reset.
|
||||
val streamsNeedingSoftReset =
|
||||
initialInitialStatuses.filter { (_, status) ->
|
||||
// if the table doesn't exist, then by definition we don't have a schema
|
||||
// mismatch.
|
||||
status.finalTableStatus?.isSchemaMismatch ?: false
|
||||
}
|
||||
runBlocking(dispatcher) {
|
||||
streamsNeedingSoftReset.forEach { (stream, _) ->
|
||||
launch {
|
||||
val (tableNames, columnNameMapping) = names[stream]!!
|
||||
finalTableOperations.softResetFinalTable(
|
||||
stream,
|
||||
tableNames,
|
||||
columnNameMapping
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Soft reset will modify the initial status of a table.
|
||||
// Refetch their statuses.
|
||||
val statusesAfterSoftReset =
|
||||
stateGatherer.gatherInitialStatus(
|
||||
TableCatalog(names.filterKeys { streamsNeedingSoftReset.containsKey(it) })
|
||||
)
|
||||
// second map "wins" when adding two maps together, so we'll retain the newer statuses.
|
||||
initialStatuses = initialInitialStatuses + statusesAfterSoftReset
|
||||
}
|
||||
}
|
||||
|
||||
override fun createStreamLoader(stream: DestinationStream): StreamLoader {
|
||||
val (tableNames, columnNameMapping) = names[stream]!!
|
||||
return TypingDedupingStreamLoader(
|
||||
stream,
|
||||
initialStatuses[stream]!!,
|
||||
tableNames,
|
||||
columnNameMapping,
|
||||
rawTableOperations,
|
||||
finalTableOperations,
|
||||
disableTypeDedupe = disableTypeDedupe,
|
||||
streamStateStore,
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -4,8 +4,8 @@
|
||||
|
||||
package io.airbyte.cdk.load.toolkits.load.db.orchestration
|
||||
|
||||
import io.airbyte.cdk.load.orchestration.db.DefaultTempTableNameGenerator
|
||||
import io.airbyte.cdk.load.table.TableName
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import io.airbyte.cdk.load.table.DefaultTempTableNameGenerator
|
||||
import org.junit.jupiter.api.Assertions.*
|
||||
import org.junit.jupiter.api.Test
|
||||
|
||||
|
||||
@@ -1,308 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
||||
*/
|
||||
|
||||
package io.airbyte.cdk.load.toolkits.load.db.orchestration
|
||||
|
||||
import io.airbyte.cdk.load.command.Append
|
||||
import io.airbyte.cdk.load.command.DestinationCatalog
|
||||
import io.airbyte.cdk.load.command.DestinationStream
|
||||
import io.airbyte.cdk.load.command.NamespaceMapper
|
||||
import io.airbyte.cdk.load.data.AirbyteType
|
||||
import io.airbyte.cdk.load.data.FieldType
|
||||
import io.airbyte.cdk.load.data.ObjectType
|
||||
import io.airbyte.cdk.load.data.StringType
|
||||
import io.airbyte.cdk.load.orchestration.db.ColumnNameGenerator
|
||||
import io.airbyte.cdk.load.orchestration.db.FinalTableNameGenerator
|
||||
import io.airbyte.cdk.load.orchestration.db.RawTableNameGenerator
|
||||
import io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping.DEFAULT_AIRBYTE_INTERNAL_NAMESPACE
|
||||
import io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping.TableCatalogFactory
|
||||
import io.airbyte.cdk.load.table.TableName
|
||||
import org.junit.jupiter.api.Assertions.assertAll
|
||||
import org.junit.jupiter.api.Assertions.assertEquals
|
||||
import org.junit.jupiter.api.Assertions.assertNull
|
||||
import org.junit.jupiter.api.Test
|
||||
|
||||
class TableCatalogFactoryTest {
|
||||
@Test
|
||||
fun testTableNameCollision() {
|
||||
// Create the same streams as in the original test - "foobarfoo" and "foofoo"
|
||||
val stream1 = createTestStream("foobarfoo", "a")
|
||||
val stream2 = createTestStream("foofoo", "a")
|
||||
|
||||
// Use SAM syntax with conditional logic in the lambda
|
||||
val rawTableNameGenerator = RawTableNameGenerator { descriptor ->
|
||||
TableName(
|
||||
"airbyte_internal",
|
||||
"""${descriptor.namespace}_${descriptor.name.replace("bar", "")}""",
|
||||
)
|
||||
}
|
||||
|
||||
val finalTableNameGenerator = FinalTableNameGenerator { descriptor ->
|
||||
TableName(
|
||||
descriptor.namespace!!,
|
||||
descriptor.name.replace("bar", ""),
|
||||
)
|
||||
}
|
||||
|
||||
val columnNameGenerator = ColumnNameGenerator { input ->
|
||||
ColumnNameGenerator.ColumnName(input, input)
|
||||
}
|
||||
|
||||
val catalog = DestinationCatalog(listOf(stream1, stream2))
|
||||
|
||||
val tableCatalog =
|
||||
TableCatalogFactory()
|
||||
.getTableCatalog(
|
||||
catalog,
|
||||
rawTableNameGenerator,
|
||||
finalTableNameGenerator,
|
||||
columnNameGenerator
|
||||
)
|
||||
|
||||
// Get the final table names for both streams
|
||||
val stream1TableInfo = tableCatalog[stream1]!!
|
||||
val stream2TableInfo = tableCatalog[stream2]!!
|
||||
|
||||
assertAll(
|
||||
{ assertEquals("foofoo", stream1TableInfo.tableNames.finalTableName!!.name) },
|
||||
{ assertEquals("a", stream1TableInfo.tableNames.finalTableName!!.namespace) },
|
||||
{ assertEquals("foofoo_3fd", stream2TableInfo.tableNames.finalTableName!!.name) },
|
||||
{
|
||||
assertEquals(
|
||||
"a",
|
||||
stream2TableInfo.tableNames.finalTableName!!.namespace,
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
// Now check raw table names with exact expected suffix
|
||||
assertAll(
|
||||
{ assertEquals("a_foofoo", stream1TableInfo.tableNames.rawTableName!!.name) },
|
||||
{
|
||||
assertEquals(
|
||||
DEFAULT_AIRBYTE_INTERNAL_NAMESPACE,
|
||||
stream1TableInfo.tableNames.rawTableName!!.namespace
|
||||
)
|
||||
},
|
||||
{ assertEquals("a_foofoo_3fd", stream2TableInfo.tableNames.rawTableName!!.name) },
|
||||
{
|
||||
assertEquals(
|
||||
DEFAULT_AIRBYTE_INTERNAL_NAMESPACE,
|
||||
stream2TableInfo.tableNames.rawTableName!!.namespace
|
||||
)
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Test two streams which don't collide in their final tables, and with no raw tables.
|
||||
*
|
||||
* We should leave both streams unchanged.
|
||||
*/
|
||||
@Test
|
||||
fun testTableNameNoCollisionWithNoRawTableGenerator() {
|
||||
val stream1 = createTestStream("foo", "a")
|
||||
val stream2 = createTestStream("bar", "a")
|
||||
|
||||
val finalTableNameGenerator = FinalTableNameGenerator { descriptor ->
|
||||
TableName(descriptor.namespace!!, descriptor.name)
|
||||
}
|
||||
|
||||
val columnNameGenerator = ColumnNameGenerator { input ->
|
||||
ColumnNameGenerator.ColumnName(input, input)
|
||||
}
|
||||
|
||||
val catalog = DestinationCatalog(listOf(stream1, stream2))
|
||||
|
||||
val tableCatalog =
|
||||
TableCatalogFactory()
|
||||
.getTableCatalog(
|
||||
catalog,
|
||||
rawTableNameGenerator = null,
|
||||
finalTableNameGenerator,
|
||||
columnNameGenerator
|
||||
)
|
||||
|
||||
// Get the final table names for both streams
|
||||
val stream1TableInfo = tableCatalog[stream1]!!
|
||||
val stream2TableInfo = tableCatalog[stream2]!!
|
||||
|
||||
assertAll(
|
||||
{ assertEquals("foo", stream1TableInfo.tableNames.finalTableName!!.name) },
|
||||
{ assertEquals("a", stream1TableInfo.tableNames.finalTableName!!.namespace) },
|
||||
{ assertEquals("bar", stream2TableInfo.tableNames.finalTableName!!.name) },
|
||||
{
|
||||
assertEquals(
|
||||
"a",
|
||||
stream2TableInfo.tableNames.finalTableName!!.namespace,
|
||||
)
|
||||
}
|
||||
)
|
||||
|
||||
// Now check raw table names are null
|
||||
assertAll(
|
||||
{ assertNull(stream1TableInfo.tableNames.rawTableName) },
|
||||
{ assertNull(stream2TableInfo.tableNames.rawTableName) },
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testTruncatingColumnNameCollision() {
|
||||
val schema =
|
||||
ObjectType(
|
||||
linkedMapOf(
|
||||
"aVeryLongColumnName" to FieldType(StringType, true),
|
||||
"aVeryLongColumnNameWithMoreTextAfterward" to FieldType(StringType, true),
|
||||
)
|
||||
)
|
||||
val stream = createTestStream("stream", "namespace", schema)
|
||||
val catalog = DestinationCatalog(listOf(stream))
|
||||
|
||||
val rawTableNameGenerator = RawTableNameGenerator { _ ->
|
||||
TableName("raw_dataset", "raw_stream")
|
||||
}
|
||||
|
||||
val finalTableNameGenerator = FinalTableNameGenerator { _ ->
|
||||
TableName("final_dataset", "final_stream")
|
||||
}
|
||||
|
||||
val columnNameGenerator = ColumnNameGenerator { input ->
|
||||
val truncated = input.substring(0, 10.coerceAtMost(input.length))
|
||||
ColumnNameGenerator.ColumnName(truncated, truncated)
|
||||
}
|
||||
|
||||
val tableCatalog =
|
||||
TableCatalogFactory()
|
||||
.getTableCatalog(
|
||||
catalog,
|
||||
rawTableNameGenerator,
|
||||
finalTableNameGenerator,
|
||||
columnNameGenerator
|
||||
)
|
||||
|
||||
val columnMapping = tableCatalog[stream]!!.columnNameMapping
|
||||
val mappedNames =
|
||||
listOf(
|
||||
columnMapping["aVeryLongColumnName"]!!,
|
||||
columnMapping["aVeryLongColumnNameWithMoreTextAfterward"]!!
|
||||
)
|
||||
|
||||
assertEquals(2, mappedNames.size)
|
||||
assertEquals("aVeryLongC", mappedNames[0])
|
||||
assertEquals("aV36rd", mappedNames[1])
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testColumnNameCollision() {
|
||||
// Create a schema with columns that will have name collision after processing
|
||||
val schema =
|
||||
ObjectType(
|
||||
linkedMapOf(
|
||||
"foobarfoo" to FieldType(StringType, true),
|
||||
"foofoo" to FieldType(StringType, true),
|
||||
)
|
||||
)
|
||||
|
||||
val stream = createTestStream("stream", "namespace", schema)
|
||||
val catalog = DestinationCatalog(listOf(stream))
|
||||
|
||||
val rawTableNameGenerator = RawTableNameGenerator { _ ->
|
||||
TableName("raw_dataset", "raw_stream")
|
||||
}
|
||||
|
||||
val finalTableNameGenerator = FinalTableNameGenerator { _ ->
|
||||
TableName("final_dataset", "final_stream")
|
||||
}
|
||||
|
||||
// Simulate name collision by removing "bar"
|
||||
val columnNameGenerator = ColumnNameGenerator { input ->
|
||||
val processedName = input.replace("bar", "")
|
||||
ColumnNameGenerator.ColumnName(processedName, processedName)
|
||||
}
|
||||
|
||||
val tableCatalog =
|
||||
TableCatalogFactory()
|
||||
.getTableCatalog(
|
||||
catalog,
|
||||
rawTableNameGenerator,
|
||||
finalTableNameGenerator,
|
||||
columnNameGenerator
|
||||
)
|
||||
|
||||
val columnMapping = tableCatalog[stream]!!.columnNameMapping
|
||||
val mappedColumns = listOf(columnMapping["foobarfoo"]!!, columnMapping["foofoo"]!!)
|
||||
|
||||
// Verify column name collision was properly resolved
|
||||
// One column should be "foofoo" and the other should be "foofoo_1"
|
||||
assertAll(
|
||||
{ assertEquals(2, mappedColumns.size) },
|
||||
{ assertEquals("foofoo", mappedColumns[0]) },
|
||||
{ assertEquals("foofoo_1", mappedColumns[1]) }
|
||||
)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun testColumnNameCollisionRelyingOnCanonicalName() {
|
||||
val schema =
|
||||
ObjectType(
|
||||
linkedMapOf(
|
||||
"FOO" to FieldType(StringType, true),
|
||||
"foo" to FieldType(StringType, true),
|
||||
)
|
||||
)
|
||||
val stream = createTestStream("stream", "namespace", schema)
|
||||
val catalog = DestinationCatalog(listOf(stream))
|
||||
val rawTableNameGenerator = RawTableNameGenerator { _ ->
|
||||
TableName("raw_dataset", "raw_stream")
|
||||
}
|
||||
val finalTableNameGenerator = FinalTableNameGenerator { _ ->
|
||||
TableName("final_dataset", "final_stream")
|
||||
}
|
||||
|
||||
// Simulate name collision by downcasing, while retaining the original name
|
||||
// as the display name
|
||||
val columnNameGenerator = ColumnNameGenerator { input ->
|
||||
ColumnNameGenerator.ColumnName(
|
||||
displayName = input,
|
||||
canonicalName = input.lowercase(),
|
||||
)
|
||||
}
|
||||
|
||||
val tableCatalog =
|
||||
TableCatalogFactory()
|
||||
.getTableCatalog(
|
||||
catalog,
|
||||
rawTableNameGenerator,
|
||||
finalTableNameGenerator,
|
||||
columnNameGenerator,
|
||||
)
|
||||
|
||||
val columnMapping = tableCatalog[stream]!!.columnNameMapping
|
||||
|
||||
assertEquals(
|
||||
mapOf(
|
||||
"FOO" to "FOO",
|
||||
"foo" to "foo_1",
|
||||
),
|
||||
columnMapping,
|
||||
)
|
||||
}
|
||||
|
||||
private fun createTestStream(
|
||||
name: String,
|
||||
namespace: String,
|
||||
schema: AirbyteType = ObjectType(linkedMapOf())
|
||||
): DestinationStream {
|
||||
return DestinationStream(
|
||||
unmappedNamespace = namespace,
|
||||
unmappedName = name,
|
||||
importType = Append,
|
||||
schema = schema,
|
||||
generationId = 1L,
|
||||
minimumGenerationId = 0L,
|
||||
syncId = 0L,
|
||||
namespaceMapper = NamespaceMapper()
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -6,7 +6,7 @@ package io.airbyte.cdk.load.toolkits.load.db.orchestration
|
||||
|
||||
import io.airbyte.cdk.load.data.AirbyteType
|
||||
import io.airbyte.cdk.load.data.ObjectValue
|
||||
import io.airbyte.cdk.load.orchestration.db.ColumnNameGenerator
|
||||
import io.airbyte.cdk.load.table.ColumnNameGenerator
|
||||
import io.airbyte.cdk.load.test.util.ExpectedRecordMapper
|
||||
import io.airbyte.cdk.load.test.util.OutputRecord
|
||||
|
||||
|
||||
@@ -15,6 +15,10 @@ import io.airbyte.cdk.load.message.CheckpointMessage
|
||||
import io.airbyte.cdk.load.message.InputRecord
|
||||
import io.airbyte.cdk.load.message.InputStreamCheckpoint
|
||||
import io.airbyte.cdk.load.message.StreamCheckpoint
|
||||
import io.airbyte.cdk.load.schema.model.ColumnSchema
|
||||
import io.airbyte.cdk.load.schema.model.StreamTableSchema
|
||||
import io.airbyte.cdk.load.schema.model.TableName
|
||||
import io.airbyte.cdk.load.schema.model.TableNames
|
||||
import io.airbyte.cdk.load.test.mock.MockDestinationDataDumper
|
||||
import io.airbyte.cdk.load.test.util.IntegrationTest
|
||||
import io.airbyte.cdk.load.test.util.NoopDestinationCleaner
|
||||
@@ -55,6 +59,17 @@ open class AbstractDlqWriteTest(
|
||||
minimumGenerationId = 0,
|
||||
syncId = 42,
|
||||
namespaceMapper = NamespaceMapper(),
|
||||
tableSchema =
|
||||
StreamTableSchema(
|
||||
columnSchema =
|
||||
ColumnSchema(
|
||||
inputSchema = mapOf(),
|
||||
inputToFinalColumnNames = mapOf(),
|
||||
finalSchema = mapOf(),
|
||||
),
|
||||
importType = Append,
|
||||
tableNames = TableNames(finalTableName = TableName("namespace", "test")),
|
||||
),
|
||||
)
|
||||
val messages =
|
||||
runSync(
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user