1
0
mirror of synced 2025-12-19 18:14:56 -05:00

Merge branch 'master' into agarctfi/source-google-drive/fix-excel-date-out-of-range

This commit is contained in:
Alfredo Garcia
2025-12-11 09:57:29 -06:00
committed by GitHub
1534 changed files with 57497 additions and 10925 deletions

View File

@@ -18,7 +18,10 @@ As needed or by request, Airbyte Maintainers can execute the following slash com
- `/bump-version` - Bumps connector versions.
- `/run-connector-tests` - Runs connector tests.
- `/run-cat-tests` - Runs CAT tests.
- `/run-live-tests` - Runs live tests for the modified connector(s).
- `/run-regression-tests` - Runs regression tests for the modified connector(s).
- `/build-connector-images` - Builds and publishes a pre-release docker image for the modified connector(s).
- `/publish-connectors-prerelease` - Publishes pre-release connector builds (tagged as `{version}-dev.{git-sha}`) for all modified connectors in the PR.
If you have any questions, feel free to ask in the PR comments or join our [Slack community](https://airbytehq.slack.com/).

View File

@@ -21,8 +21,18 @@ Airbyte Maintainers (that's you!) can execute the following slash commands on yo
- `/bump-version` - Bumps connector versions.
- You can specify a custom changelog by passing `changelog`. Example: `/bump-version changelog="My cool update"`
- Leaving the changelog arg blank will auto-populate the changelog from the PR title.
- `/bump-progressive-rollout-version` - Bumps connector version with an RC suffix for progressive rollouts.
- Creates a release candidate version (e.g., `2.16.10-rc.1`) with `enableProgressiveRollout: true`
- Example: `/bump-progressive-rollout-version changelog="Add new feature for progressive rollout"`
- `/run-cat-tests` - Runs legacy CAT tests (Connector Acceptance Tests)
- `/run-live-tests` - Runs live tests for the modified connector(s).
- `/run-regression-tests` - Runs regression tests for the modified connector(s).
- `/build-connector-images` - Builds and publishes a pre-release docker image for the modified connector(s).
- `/publish-connectors-prerelease` - Publishes pre-release connector builds (tagged as `{version}-dev.{git-sha}`) for all modified connectors in the PR.
- Connector release lifecycle (AI-powered):
- `/ai-prove-fix` - Runs prerelease readiness checks, including testing against customer connections.
- `/ai-canary-prerelease` - Rolls out prerelease to 5-10 connections for canary testing.
- `/ai-release-watch` - Monitors rollout post-release and tracks sync success rates.
- JVM connectors:
- `/update-connector-cdk-version connector=<CONNECTOR_NAME>` - Updates the specified connector to the latest CDK version.
Example: `/update-connector-cdk-version connector=destination-bigquery`

View File

@@ -0,0 +1,34 @@
name: Regression Report Evaluation
description: Evaluate Airbyte connector regression test reports and return a JSON verdict with reasoning
model: llama3.2:3b
modelParameters:
temperature: 0.3
messages:
- role: system
content: |
You are an expert at evaluating connector regression test results.
Your task is to analyze the test report and determine if the regression tests should PASS or FAIL.
Consider the following criteria:
1. All test cases should pass (no failed tests)
2. Record count differences between control and target versions should be minimal or explainable
3. Message count differences should not indicate data loss or corruption
4. Stream coverage should be reasonable
5. Any warnings or errors in test outputs should be evaluated for severity
Provide your evaluation in the following JSON format:
{
"pass": true/false,
"summary": "A concise 2-3 sentence summary of the evaluation",
"reasoning": "Detailed reasoning for your pass/fail decision, including specific issues found",
"severity": "critical/major/minor/none",
"recommendations": "Any recommendations for addressing issues"
}
Be strict but fair in your evaluation. Minor differences are acceptable, but data loss,
corruption, or test failures should result in a FAIL.
- role: user
content: |
Report:
{{report_text}}

View File

@@ -0,0 +1,71 @@
name: AI Canary Prerelease Command
on:
workflow_dispatch:
inputs:
pr:
description: "Pull request number (if triggered from a PR)"
type: number
required: false
comment-id:
description: "The comment-id of the slash command. Used to update the comment with the status."
required: false
repo:
description: "Repo (passed by slash command dispatcher)"
required: false
default: "airbytehq/airbyte"
gitref:
description: "Git ref (passed by slash command dispatcher)"
required: false
run-name: "AI Canary Prerelease for PR #${{ github.event.inputs.pr }}"
permissions:
contents: read
issues: write
pull-requests: read
jobs:
ai-canary-prerelease:
runs-on: ubuntu-latest
steps:
- name: Get job variables
id: job-vars
run: |
echo "run-url=https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
- name: Checkout code
uses: actions/checkout@v4
- name: Authenticate as GitHub App
uses: actions/create-github-app-token@v2
id: get-app-token
with:
owner: "airbytehq"
repositories: "airbyte,oncall"
app-id: ${{ secrets.OCTAVIA_BOT_APP_ID }}
private-key: ${{ secrets.OCTAVIA_BOT_PRIVATE_KEY }}
- name: Post start comment
if: inputs.comment-id != ''
uses: peter-evans/create-or-update-comment@v4
with:
comment-id: ${{ inputs.comment-id }}
issue-number: ${{ inputs.pr }}
body: |
> **AI Canary Prerelease Started**
>
> Rolling out to 5-10 connections, watching results, and reporting findings.
> [View workflow run](${{ steps.job-vars.outputs.run-url }})
- name: Run AI Canary Prerelease
uses: aaronsteers/devin-action@main
with:
comment-id: ${{ inputs.comment-id }}
issue-number: ${{ inputs.pr }}
playbook-macro: "!canary_prerelease"
devin-token: ${{ secrets.DEVIN_AI_API_KEY }}
github-token: ${{ steps.get-app-token.outputs.token }}
start-message: "🐤 **AI Canary Prerelease session starting...** Rolling out to 5-10 connections, watching results, and reporting findings. [View playbook](https://github.com/airbytehq/oncall/blob/main/prompts/playbooks/canary_prerelease.md)"
tags: |
ai-oncall

View File

@@ -0,0 +1,71 @@
name: AI Prove Fix Command
on:
workflow_dispatch:
inputs:
pr:
description: "Pull request number (if triggered from a PR)"
type: number
required: false
comment-id:
description: "The comment-id of the slash command. Used to update the comment with the status."
required: false
repo:
description: "Repo (passed by slash command dispatcher)"
required: false
default: "airbytehq/airbyte"
gitref:
description: "Git ref (passed by slash command dispatcher)"
required: false
run-name: "AI Prove Fix for PR #${{ github.event.inputs.pr }}"
permissions:
contents: read
issues: write
pull-requests: read
jobs:
ai-prove-fix:
runs-on: ubuntu-latest
steps:
- name: Get job variables
id: job-vars
run: |
echo "run-url=https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
- name: Checkout code
uses: actions/checkout@v4
- name: Authenticate as GitHub App
uses: actions/create-github-app-token@v2
id: get-app-token
with:
owner: "airbytehq"
repositories: "airbyte,oncall"
app-id: ${{ secrets.OCTAVIA_BOT_APP_ID }}
private-key: ${{ secrets.OCTAVIA_BOT_PRIVATE_KEY }}
- name: Post start comment
if: inputs.comment-id != ''
uses: peter-evans/create-or-update-comment@v4
with:
comment-id: ${{ inputs.comment-id }}
issue-number: ${{ inputs.pr }}
body: |
> **AI Prove Fix Started**
>
> Running readiness checks and testing against customer connections.
> [View workflow run](${{ steps.job-vars.outputs.run-url }})
- name: Run AI Prove Fix
uses: aaronsteers/devin-action@main
with:
comment-id: ${{ inputs.comment-id }}
issue-number: ${{ inputs.pr }}
playbook-macro: "!prove_fix"
devin-token: ${{ secrets.DEVIN_AI_API_KEY }}
github-token: ${{ steps.get-app-token.outputs.token }}
start-message: "🔍 **AI Prove Fix session starting...** Running readiness checks and testing against customer connections. [View playbook](https://github.com/airbytehq/oncall/blob/main/prompts/playbooks/prove_fix.md)"
tags: |
ai-oncall

View File

@@ -0,0 +1,71 @@
name: AI Release Watch Command
on:
workflow_dispatch:
inputs:
pr:
description: "Pull request number (if triggered from a PR)"
type: number
required: false
comment-id:
description: "The comment-id of the slash command. Used to update the comment with the status."
required: false
repo:
description: "Repo (passed by slash command dispatcher)"
required: false
default: "airbytehq/airbyte"
gitref:
description: "Git ref (passed by slash command dispatcher)"
required: false
run-name: "AI Release Watch for PR #${{ github.event.inputs.pr }}"
permissions:
contents: read
issues: write
pull-requests: read
jobs:
ai-release-watch:
runs-on: ubuntu-latest
steps:
- name: Get job variables
id: job-vars
run: |
echo "run-url=https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
- name: Checkout code
uses: actions/checkout@v4
- name: Authenticate as GitHub App
uses: actions/create-github-app-token@v2
id: get-app-token
with:
owner: "airbytehq"
repositories: "airbyte,oncall"
app-id: ${{ secrets.OCTAVIA_BOT_APP_ID }}
private-key: ${{ secrets.OCTAVIA_BOT_PRIVATE_KEY }}
- name: Post start comment
if: inputs.comment-id != ''
uses: peter-evans/create-or-update-comment@v4
with:
comment-id: ${{ inputs.comment-id }}
issue-number: ${{ inputs.pr }}
body: |
> **AI Release Watch Started**
>
> Monitoring rollout and tracking sync success rates.
> [View workflow run](${{ steps.job-vars.outputs.run-url }})
- name: Run AI Release Watch
uses: aaronsteers/devin-action@main
with:
comment-id: ${{ inputs.comment-id }}
issue-number: ${{ inputs.pr }}
playbook-macro: "!release_watch"
devin-token: ${{ secrets.DEVIN_AI_API_KEY }}
github-token: ${{ steps.get-app-token.outputs.token }}
start-message: "👁️ **AI Release Watch session starting...** Monitoring rollout and tracking sync success rates. [View playbook](https://github.com/airbytehq/oncall/blob/main/prompts/playbooks/release_watch.md)"
tags: |
ai-oncall

View File

@@ -0,0 +1,178 @@
name: Bump connector version for progressive rollout
on:
workflow_dispatch:
inputs:
pr:
description: "Pull request number. This PR will be referenced in the changelog line."
type: number
required: false
comment-id:
description: "Optional. The comment-id of the slash command. Used to update the comment with the status."
required: false
type:
description: "The type of bump to perform. One of 'major', 'minor', or 'patch'."
required: false
default: "patch"
changelog:
description: "Optional. The comment to add to the changelog. If not provided, the PR title will be used."
required: false
default: ""
# These must be declared, but they are unused and ignored.
# TODO: Infer 'repo' and 'gitref' from PR number on other workflows, so we can remove these.
repo:
description: "Repo (Ignored)"
required: false
default: "airbytehq/airbyte"
gitref:
description: "Ref (Ignored)"
required: false
run-name: "Bump connector version for progressive rollout in PR: #${{ github.event.inputs.pr }}"
concurrency:
group: ${{ github.workflow }}-${{ github.event.inputs.pr }}
# Cancel any previous runs on the same branch if they are still in progress
cancel-in-progress: true
jobs:
bump-progressive-rollout-version:
name: "Bump version of connectors for progressive rollout in this PR"
runs-on: ubuntu-24.04
steps:
- name: Get job variables
id: job-vars
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
shell: bash
run: |
PR_JSON=$(gh api repos/${{ github.repository }}/pulls/${{ github.event.inputs.pr }})
echo "repo=$(echo "$PR_JSON" | jq -r .head.repo.full_name)" >> $GITHUB_OUTPUT
echo "branch=$(echo "$PR_JSON" | jq -r .head.ref)" >> $GITHUB_OUTPUT
echo "pr_title=$(echo "$PR_JSON" | jq -r .title)" >> $GITHUB_OUTPUT
echo "run-url=https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
# NOTE: We still use a PAT here (rather than a GitHub App) because the workflow needs
# permissions to add commits to our main repo as well as forks. This will only work on
# forks if the user installs the app into their fork. Until we document this as a clear
# path, we will have to keep using the PAT.
- name: Checkout Airbyte
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
repository: ${{ steps.job-vars.outputs.repo }}
ref: ${{ steps.job-vars.outputs.branch }}
fetch-depth: 1
# Important that token is a PAT so that CI checks are triggered again.
# Without this we would be forever waiting on required checks to pass.
token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }}
- name: Append comment with job run link
# If comment-id is not provided, this will create a new
# comment with the job run link.
id: first-comment-action
uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
with:
comment-id: ${{ github.event.inputs.comment-id }}
issue-number: ${{ github.event.inputs.pr }}
body: |
> **Progressive Rollout Version Bump Started**
>
> This will bump the connector version with an RC suffix and enable progressive rollout.
> [Check job output.][1]
[1]: ${{ steps.job-vars.outputs.run-url }}
- name: Log changelog source
run: |
if [ -n "${{ github.event.inputs.changelog }}" ]; then
echo "Using user-provided changelog: ${{ github.event.inputs.changelog }}"
else
echo "Using PR title as changelog: ${{ steps.job-vars.outputs.pr_title }}"
fi
- name: Run airbyte-ci connectors --modified bump-version with --rc flag
uses: ./.github/actions/run-airbyte-ci
continue-on-error: true
with:
context: "manual"
gcs_credentials: ${{ secrets.METADATA_SERVICE_PROD_GCS_CREDENTIALS }}
sentry_dsn: ${{ secrets.SENTRY_AIRBYTE_CI_DSN }}
github_token: ${{ secrets.GH_PAT_APPROVINGTON_OCTAVIA }}
git_repo_url: https://github.com/${{ steps.job-vars.outputs.repo }}.git
subcommand: |
connectors --modified bump-version \
${{ github.event.inputs.type }} \
"${{ github.event.inputs.changelog != '' && github.event.inputs.changelog || steps.job-vars.outputs.pr_title }}" \
--pr-number ${{ github.event.inputs.pr }} \
--rc
# This is helpful in the case that we change a previously committed generated file to be ignored by git.
- name: Remove any files that have been gitignored
run: git ls-files -i -c --exclude-from=.gitignore | xargs -r git rm --cached
# Check for changes in git
- name: Check for changes
id: git-diff
run: |
git diff --quiet && echo "No changes to commit" || echo "changes=true" >> $GITHUB_OUTPUT
shell: bash
# Commit changes (if any)
- name: Commit changes
id: commit-step
if: steps.git-diff.outputs.changes == 'true'
run: |
git config --global user.name "Octavia Squidington III"
git config --global user.email "octavia-squidington-iii@users.noreply.github.com"
git add .
git commit -m "chore: bump-version for progressive rollout"
echo "sha=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT
- name: Push changes to '(${{ steps.job-vars.outputs.repo }})'
if: steps.git-diff.outputs.changes == 'true'
run: |
git remote add contributor https://github.com/${{ steps.job-vars.outputs.repo }}.git
git push contributor HEAD:'${{ steps.job-vars.outputs.branch }}'
- name: Append success comment
uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
if: steps.git-diff.outputs.changes == 'true'
with:
comment-id: ${{ steps.first-comment-action.outputs.comment-id }}
reactions: hooray
body: |
> **Progressive Rollout Version Bump: SUCCESS**
>
> The connector version has been bumped with an RC suffix (e.g., `X.Y.Z-rc.1`).
> Changes applied successfully. (${{ steps.commit-step.outputs.sha }})
>
> **Next steps:**
> 1. Merge this PR to publish the RC version
> 2. Monitor the progressive rollout in production
> 3. When ready to promote, use the `finalize_rollout` workflow with `action=promote`
> 4. If issues arise, use `action=rollback` instead
- name: Append success comment (no-op)
uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
if: steps.git-diff.outputs.changes != 'true'
with:
comment-id: ${{ steps.first-comment-action.outputs.comment-id }}
reactions: "-1"
body: |
> Job completed successfully (no changes detected).
>
> This might happen if:
> - The connector already has an RC version
> - No modified connectors were detected in this PR
- name: Append failure comment
uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
if: failure()
with:
comment-id: ${{ steps.first-comment-action.outputs.comment-id }}
reactions: confused
body: |
> Job failed. Check the [workflow logs](${{ steps.job-vars.outputs.run-url }}) for details.

View File

@@ -0,0 +1,173 @@
name: Kotlin Bulk CDK Docs
on:
pull_request:
types: [opened, synchronize, reopened]
paths:
- "airbyte-cdk/bulk/**"
push:
branches:
- master
paths:
- "airbyte-cdk/bulk/**"
workflow_dispatch:
# Concurrency group ensures only one deployment runs at a time
concurrency:
group: kotlin-bulk-cdk-docs-${{ github.ref }}
cancel-in-progress: false
jobs:
detect-changes:
name: Detect Kotlin Bulk CDK Changes
runs-on: ubuntu-24.04
steps:
- name: Force 'changed=true' [Manual Trigger]
id: set-changed
if: github.event_name == 'workflow_dispatch'
run: echo "changed=true" >> "$GITHUB_OUTPUT"
- name: Checkout Repository [Push Trigger]
if: github.event_name == 'push'
uses: actions/checkout@v4
with:
fetch-depth: 2
- name: Detect Changes
# PR triggers will use API (don't require pre-checkout.)
# Push triggers will require the checked-out code.
id: detect-changes
if: github.event_name != 'workflow_dispatch'
uses: dorny/paths-filter@v3.0.2
with:
filters: |
bulk-cdk:
- 'airbyte-cdk/bulk/**'
outputs:
changed: ${{ steps.set-changed.outputs.changed || steps.detect-changes.outputs.bulk-cdk }}
build-docs:
name: Build Kotlin Bulk CDK Documentation
runs-on: ubuntu-24.04
needs: detect-changes
# Build docs if changes detected OR if manually triggered via workflow_dispatch
if: needs.detect-changes.outputs.changed == 'true' || github.event_name == 'workflow_dispatch'
steps:
- name: Checkout Repository
uses: actions/checkout@v4
with:
fetch-depth: 1
repository: ${{ github.event.pull_request.head.repo.full_name || github.repository }}
ref: ${{ github.head_ref || github.ref }}
- name: Set up Java
uses: actions/setup-java@v4
with:
distribution: "zulu"
java-version: "21"
- name: Setup Gradle
uses: gradle/gradle-build-action@v3
with:
gradle-version: wrapper
- name: Generate Dokka Documentation
run: |
echo "📚 Generating Dokka documentation for Kotlin Bulk CDK..."
./gradlew :airbyte-cdk:bulk:dokkaHtmlMultiModule --no-daemon
echo "✅ Documentation generated successfully"
ls -la airbyte-cdk/bulk/build/dokka/htmlMultiModule/
- name: Upload Documentation Artifact
uses: actions/upload-artifact@v4
with:
name: kotlin-bulk-cdk-docs-${{ github.sha }}
path: airbyte-cdk/bulk/build/dokka/htmlMultiModule/
retention-days: 30
vercel-deploy:
name: Deploy Docs to Vercel ${{ github.ref == 'refs/heads/master' && '(Production)' || '(Preview)' }}
needs: [detect-changes, build-docs]
# Deploy for: non-fork PRs, master branch pushes, OR manual workflow_dispatch
# Always require Vercel project to be configured
if: >
(needs.detect-changes.outputs.changed == 'true' || github.event_name == 'workflow_dispatch')
&& (
github.event_name == 'push'
|| github.event.pull_request.head.repo.full_name == github.repository
|| github.event_name == 'workflow_dispatch'
)
&& vars.VERCEL_KOTLIN_CDK_PROJECT_ID != ''
runs-on: ubuntu-24.04
environment:
name: ${{ github.ref == 'refs/heads/master' && 'kotlin-cdk-docs' || 'kotlin-cdk-docs-preview' }}
url: ${{ steps.deploy-vercel.outputs.preview-url }}
env:
VERCEL_ORG_ID: ${{ secrets.VERCEL_ORG_ID }}
VERCEL_PROJECT_ID: ${{ vars.VERCEL_KOTLIN_CDK_PROJECT_ID }}
steps:
- name: Checkout Repository
uses: actions/checkout@v4
with:
fetch-depth: 1
- name: Download Documentation Artifact
uses: actions/download-artifact@v4
with:
name: kotlin-bulk-cdk-docs-${{ github.sha }}
path: docs-output/airbyte-cdk/bulk
- name: Debug - Show artifact structure
run: |
echo "📂 Artifact structure:"
ls -lah docs-output/airbyte-cdk/bulk
echo ""
echo "🔍 Looking for index.html:"
find docs-output -type f -name "index.html" -print
echo ""
echo "✅ Verifying deployment path..."
test -f docs-output/airbyte-cdk/bulk/index.html && echo "✅ index.html found at expected path" || echo "❌ index.html NOT found at expected path"
- name: Debug - Deployment Mode
run: |
echo "Event: ${{ github.event_name }}"
echo "Ref: ${{ github.ref }}"
echo "Is Production: ${{ github.ref == 'refs/heads/master' }}"
echo "Vercel Args: ${{ github.ref == 'refs/heads/master' && '--prod' || '(none - preview)' }}"
- name: Deploy to Vercel
id: deploy-vercel
uses: amondnet/vercel-action@v41.1.4
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
vercel-token: ${{ secrets.VERCEL_TOKEN }}
vercel-org-id: ${{ env.VERCEL_ORG_ID }}
vercel-project-id: ${{ env.VERCEL_PROJECT_ID }}
working-directory: docs-output
vercel-args: ${{ github.ref == 'refs/heads/master' && '--prod' || '' }}
- name: Authenticate as GitHub App
if: github.event_name == 'pull_request'
uses: actions/create-github-app-token@v2.0.6
id: get-app-token
with:
owner: "airbytehq"
repositories: "airbyte"
app-id: ${{ secrets.OCTAVIA_BOT_APP_ID }}
private-key: ${{ secrets.OCTAVIA_BOT_PRIVATE_KEY }}
- name: Post Custom Check with Preview URL
if: github.event_name == 'pull_request'
uses: LouisBrunner/checks-action@v2.0.0
with:
name: "Kotlin Bulk CDK Docs Preview"
status: completed
conclusion: success
details_url: ${{ steps.deploy-vercel.outputs.preview-url }}
token: ${{ steps.get-app-token.outputs.token }}
output: |
{"summary":"Documentation preview deployed successfully","text":"View the Kotlin Bulk CDK documentation at the preview URL"}

View File

@@ -0,0 +1,28 @@
name: Label Community PRs
# This workflow automatically adds the "community" label to PRs from forks.
# This enables automatic tracking on the Community PRs project board.
on:
pull_request_target:
types:
- opened
- reopened
jobs:
label-community-pr:
name: Add "Community" Label to PR
# Only run for PRs from forks
if: github.event.pull_request.head.repo.fork == true
runs-on: ubuntu-24.04
permissions:
issues: write
pull-requests: write
steps:
- name: Add community label
# This action uses GitHub's addLabels API, which is idempotent.
# If the label already exists, the API call succeeds without error.
uses: actions-ecosystem/action-add-labels@bd52874380e3909a1ac983768df6976535ece7f8 # v1.1.3
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
labels: community

View File

@@ -0,0 +1,209 @@
name: Publish Connectors Pre-release
# This workflow publishes a pre-release connector build from a PR branch.
# It can be triggered via the /publish-connectors-prerelease slash command from PR comments,
# or via the MCP tool `publish_connector_to_airbyte_registry`.
#
# Pre-release versions are tagged with the format: {version}-dev.{10-char-git-sha}
# These versions are NOT eligible for semver auto-advancement but ARE available
# for version pinning via the scoped_configuration API.
#
# Usage:
# /publish-connectors-prerelease # Auto-detects single modified connector
# /publish-connectors-prerelease connector=source-github # Explicit connector name
#
# If no connector is specified, the workflow auto-detects modified connectors.
# It will fail if 0 or 2+ connectors are modified (only single-connector publishing is supported).
on:
workflow_dispatch:
inputs:
# Global static-arg inputs for slash commands
repo:
description: "The repository name"
required: false
default: "airbytehq/airbyte"
type: string
gitref:
description: "The git reference (branch or tag)"
required: false
type: string
comment-id:
description: "The ID of the comment triggering the workflow"
required: false
type: number
pr:
description: "The pull request number, if applicable"
required: false
type: number
connector:
description: "Single connector name to publish (e.g., destination-pinecone). If not provided, auto-detects from PR changes (fails if 0 or 2+ connectors modified)."
required: false
type: string
concurrency:
group: ${{ github.workflow }}-${{ github.event.inputs.pr || github.run_id }}
cancel-in-progress: false
jobs:
init:
name: Initialize Pre-release Publish
runs-on: ubuntu-24.04
outputs:
run-url: ${{ steps.job-vars.outputs.run-url }}
pr-number: ${{ steps.job-vars.outputs.pr-number }}
comment-id: ${{ steps.append-start-comment.outputs.comment-id }}
short-sha: ${{ steps.get-sha.outputs.short-sha }}
connector-name: ${{ steps.resolve-connector.outputs.connector-name }}
connector-version: ${{ steps.connector-version.outputs.connector-version }}
steps:
- name: Checkout to get commit SHA
uses: actions/checkout@v4
with:
repository: ${{ inputs.repo || github.repository }}
ref: ${{ inputs.gitref || '' }}
fetch-depth: 0
- name: Get short SHA
id: get-sha
run: |
SHORT_SHA=$(git rev-parse --short=10 HEAD)
echo "short-sha=$SHORT_SHA" >> $GITHUB_OUTPUT
- name: Get job variables
id: job-vars
run: |
echo "run-url=https://github.com/${{ github.repository }}/actions/runs/$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
echo "pr-number=${{ inputs.pr }}" >> $GITHUB_OUTPUT
- name: Resolve connector name
id: resolve-connector
run: |
set -euo pipefail
if [[ -n "${{ inputs.connector }}" ]]; then
echo "Connector explicitly provided: ${{ inputs.connector }}"
echo "connector-name=${{ inputs.connector }}" >> "$GITHUB_OUTPUT"
exit 0
fi
echo "No connector provided, detecting modified connectors..."
MODIFIED_JSON=$(./poe-tasks/get-modified-connectors.sh --json)
echo "Modified connectors JSON: $MODIFIED_JSON"
CONNECTORS=$(echo "$MODIFIED_JSON" | jq -r '.connector | map(select(. != "")) | .[]')
CONNECTOR_COUNT=$(echo "$MODIFIED_JSON" | jq -r '.connector | map(select(. != "")) | length')
echo "Found $CONNECTOR_COUNT modified connector(s)"
if [[ "$CONNECTOR_COUNT" -eq 0 ]]; then
echo "::error::No modified connectors found in this PR. Please specify a connector name explicitly."
exit 1
elif [[ "$CONNECTOR_COUNT" -gt 1 ]]; then
echo "::error::Multiple modified connectors found: $CONNECTORS. This workflow only supports publishing one connector at a time. Please specify a connector name explicitly."
exit 1
fi
CONNECTOR_NAME=$(echo "$CONNECTORS" | head -n1)
echo "Auto-detected single modified connector: $CONNECTOR_NAME"
echo "connector-name=$CONNECTOR_NAME" >> "$GITHUB_OUTPUT"
- name: Determine connector version
id: connector-version
run: |
set -euo pipefail
CONNECTOR_NAME="${{ steps.resolve-connector.outputs.connector-name }}"
CONNECTOR_DIR="airbyte-integrations/connectors/$CONNECTOR_NAME"
VERSION=""
if [[ -f "$CONNECTOR_DIR/manifest.yaml" ]]; then
VERSION=$(grep -E '^\s*version:' "$CONNECTOR_DIR/manifest.yaml" | head -n1 | awk '{print $2}' | tr -d '"')
fi
if [[ -z "$VERSION" ]] && [[ -f "$CONNECTOR_DIR/metadata.yaml" ]]; then
VERSION=$(grep -E '^\s*dockerImageTag:' "$CONNECTOR_DIR/metadata.yaml" | head -n1 | awk '{print $2}' | tr -d '"')
fi
echo "connector-version=$VERSION" >> "$GITHUB_OUTPUT"
- name: Append start comment
id: append-start-comment
if: inputs.comment-id != '' || inputs.pr != ''
uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
with:
comment-id: ${{ inputs.comment-id }}
issue-number: ${{ steps.job-vars.outputs.pr-number }}
reactions: "+1"
body: |
> **Pre-release Connector Publish Started**
>
> Publishing pre-release build for connector `${{ steps.resolve-connector.outputs.connector-name }}`.
> Branch: `${{ inputs.gitref }}`
>
> Pre-release versions will be tagged as `{version}-dev.${{ steps.get-sha.outputs.short-sha }}`
> and are available for version pinning via the scoped_configuration API.
>
> [View workflow run](${{ steps.job-vars.outputs.run-url }})
publish:
name: Publish Pre-release
needs: [init]
uses: ./.github/workflows/publish_connectors.yml
with:
connectors: ${{ format('--name={0}', needs.init.outputs.connector-name) }}
release-type: pre-release
secrets: inherit
post-completion:
name: Post Completion Status
needs: [init, publish]
runs-on: ubuntu-24.04
if: always() && (inputs.comment-id != '' || inputs.pr != '')
steps:
- name: Determine publish status
id: status
run: |
if [[ "${{ needs.publish.result }}" == "success" ]]; then
echo "status_emoji=:white_check_mark:" >> $GITHUB_OUTPUT
echo "status_text=SUCCESS" >> $GITHUB_OUTPUT
elif [[ "${{ needs.publish.result }}" == "failure" ]]; then
echo "status_emoji=:x:" >> $GITHUB_OUTPUT
echo "status_text=FAILED" >> $GITHUB_OUTPUT
elif [[ "${{ needs.publish.result }}" == "cancelled" ]]; then
echo "status_emoji=:warning:" >> $GITHUB_OUTPUT
echo "status_text=CANCELLED" >> $GITHUB_OUTPUT
else
echo "status_emoji=:grey_question:" >> $GITHUB_OUTPUT
echo "status_text=UNKNOWN" >> $GITHUB_OUTPUT
fi
- name: Prepare message variables
id: message-vars
run: |
CONNECTOR_NAME="${{ needs.init.outputs.connector-name }}"
# Use the actual docker-image-tag from the publish workflow output
DOCKER_TAG="${{ needs.publish.outputs.docker-image-tag }}"
if [[ -z "$DOCKER_TAG" ]]; then
echo "::error::docker-image-tag output is missing from publish workflow. This is unexpected."
exit 1
fi
echo "connector_name=$CONNECTOR_NAME" >> $GITHUB_OUTPUT
echo "docker_image=airbyte/$CONNECTOR_NAME" >> $GITHUB_OUTPUT
echo "docker_tag=$DOCKER_TAG" >> $GITHUB_OUTPUT
echo "dockerhub_url=https://hub.docker.com/layers/airbyte/$CONNECTOR_NAME/$DOCKER_TAG" >> $GITHUB_OUTPUT
echo "oss_registry_url=https://connectors.airbyte.com/files/metadata/airbyte/$CONNECTOR_NAME/$DOCKER_TAG/oss.json" >> $GITHUB_OUTPUT
echo "cloud_registry_url=https://connectors.airbyte.com/files/metadata/airbyte/$CONNECTOR_NAME/$DOCKER_TAG/cloud.json" >> $GITHUB_OUTPUT
- name: Append completion comment
uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0
with:
comment-id: ${{ needs.init.outputs.comment-id }}
issue-number: ${{ needs.init.outputs.pr-number }}
body: |
> **Pre-release Publish: ${{ steps.status.outputs.status_text }}** ${{ steps.status.outputs.status_emoji }}
>
> **Docker image (pre-release):**
> `${{ steps.message-vars.outputs.docker_image }}:${{ steps.message-vars.outputs.docker_tag }}`
>
> **Docker Hub:** ${{ steps.message-vars.outputs.dockerhub_url }}
>
> **Registry JSON:**
> - [OSS Registry](${{ steps.message-vars.outputs.oss_registry_url }})
> - [Cloud Registry](${{ steps.message-vars.outputs.cloud_registry_url }})

View File

@@ -21,6 +21,10 @@ on:
required: false
default: false
type: boolean
outputs:
docker-image-tag:
description: "Docker image tag used when publishing. For single-connector callers only; multi-connector callers should not rely on this output."
value: ${{ jobs.publish_connector_registry_entries.outputs.docker-image-tag }}
workflow_dispatch:
inputs:
connectors:
@@ -250,6 +254,8 @@ jobs:
max-parallel: 5
# Allow all jobs to run, even if one fails
fail-fast: false
outputs:
docker-image-tag: ${{ steps.connector-metadata.outputs.docker-image-tag }}
steps:
- name: Checkout Airbyte
# v4

View File

@@ -1,167 +0,0 @@
name: Connector Ops CI - Run Regression Tests
concurrency:
# This is the name of the concurrency group. It is used to prevent concurrent runs of the same workflow.
#
# - github.head_ref is only defined on PR runs, it makes sure that the concurrency group is unique for pull requests
# ensuring that only one run per pull request is active at a time.
#
# - github.run_id is defined on all runs, it makes sure that the concurrency group is unique for workflow dispatches.
# This allows us to run multiple workflow dispatches in parallel.
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
on:
workflow_dispatch:
inputs:
connector_name:
description: Connector name (e.g. source-faker)
required: true
connection_id:
description: ID of the connection to test; use "auto" to let the connection retriever choose a connection
required: true
default: auto
pr_url:
description: URL of the PR containing the code change
required: true
streams:
description: Streams to include in regression tests
should_read_with_state:
description: Whether to run tests against the read command with state
default: "true"
type: boolean
use_local_cdk:
description: Use the local CDK when building the target connector
default: "false"
type: boolean
disable_proxy:
description: Disable proxy for requests
default: "false"
type: boolean
connection_subset:
description: The subset of connections to select from.
required: true
type: choice
default: all
options:
- sandboxes
- all
control_version:
description: The version to use as a control version. This is useful when the version defined in the cloud registry does not have a lot of usage (either because a progressive rollout is underway or because a new version has just been released).
required: false
type: string
jobs:
regression_tests:
name: Regression Tests
runs-on: linux-24.04-large # Custom runner, defined in GitHub org settings
timeout-minutes: 360 # 6 hours
steps:
- name: Install Python
id: install_python
uses: actions/setup-python@7f4fc3e22c37d6ff65e88745f38bd3157c663f7c # v4.9.1
with:
python-version: "3.11"
check-latest: true
update-environment: true
- name: Checkout Airbyte
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
- name: Extract branch name [WORKFLOW DISPATCH]
shell: bash
if: github.event_name == 'workflow_dispatch'
run: echo "branch=${GITHUB_REF#refs/heads/}" >> $GITHUB_OUTPUT
id: extract_branch
- name: Install Poetry
id: install_poetry
uses: snok/install-poetry@76e04a911780d5b312d89783f7b1cd627778900a # v1.4.1
with:
version: 1.8.5
- name: Make poetry venv in project
id: poetry_venv
run: poetry config virtualenvs.in-project true
- name: Install Python packages
id: install_python_packages
working-directory: airbyte-ci/connectors/pipelines
run: poetry install
- name: Fetch last commit id from remote branch [WORKFLOW DISPATCH]
if: github.event_name == 'workflow_dispatch'
id: fetch_last_commit_id_wd
run: echo "commit_id=$(git rev-parse origin/${{ steps.extract_branch.outputs.branch }})" >> $GITHUB_OUTPUT
- name: Setup Stream Parameters
if: github.event_name == 'workflow_dispatch'
run: |
if [ -z "${{ github.event.inputs.streams }}" ]; then
echo "STREAM_PARAMS=" >> $GITHUB_ENV
else
STREAMS=$(echo "${{ github.event.inputs.streams }}" | sed 's/,/ --connector_live_tests.selected-streams=/g')
echo "STREAM_PARAMS=--connector_live_tests.selected-streams=$STREAMS" >> $GITHUB_ENV
fi
- name: Setup Local CDK Flag
if: github.event_name == 'workflow_dispatch'
run: |
if ${{ github.event.inputs.use_local_cdk }}; then
echo "USE_LOCAL_CDK_FLAG=--use-local-cdk" >> $GITHUB_ENV
else
echo "USE_LOCAL_CDK_FLAG=" >> $GITHUB_ENV
fi
- name: Setup State Flag
if: github.event_name == 'workflow_dispatch'
run: |
if ${{ github.event.inputs.should_read_with_state }}; then
echo "READ_WITH_STATE_FLAG=--connector_live_tests.should-read-with-state" >> $GITHUB_ENV
else
echo "READ_WITH_STATE_FLAG=" >> $GITHUB_ENV
fi
- name: Setup Proxy Flag
if: github.event_name == 'workflow_dispatch'
run: |
if ${{ github.event.inputs.disable_proxy }}; then
echo "DISABLE_PROXY_FLAG=--connector_live_tests.disable-proxy" >> $GITHUB_ENV
else
echo "DISABLE_PROXY_FLAG=" >> $GITHUB_ENV
fi
- name: Setup Connection Subset Option
if: github.event_name == 'workflow_dispatch'
run: |
echo "CONNECTION_SUBSET=--connector_live_tests.connection-subset=${{ github.event.inputs.connection_subset }}" >> $GITHUB_ENV
- name: Setup Control Version
if: github.event_name == 'workflow_dispatch'
run: |
if [ -n "${{ github.event.inputs.control_version }}" ]; then
echo "CONTROL_VERSION=--connector_live_tests.control-version=${{ github.event.inputs.control_version }}" >> $GITHUB_ENV
else
echo "CONTROL_VERSION=" >> $GITHUB_ENV
fi
# NOTE: We still use a PAT here (rather than a GitHub App) because the workflow needs
# permissions to add commits to our main repo as well as forks. This will only work on
# forks if the user installs the app into their fork. Until we document this as a clear
# path, we will have to keep using the PAT.
- name: Run Regression Tests [WORKFLOW DISPATCH]
if: github.event_name == 'workflow_dispatch' # TODO: consider using the matrix strategy (https://docs.github.com/en/actions/using-jobs/using-a-matrix-for-your-jobs). See https://github.com/airbytehq/airbyte/pull/37659#discussion_r1583380234 for details.
uses: ./.github/actions/run-airbyte-ci
with:
context: "manual"
dagger_cloud_token: ${{ secrets.DAGGER_CLOUD_TOKEN_CACHE_3 }}
docker_hub_password: ${{ secrets.DOCKER_HUB_PASSWORD }}
docker_hub_username: ${{ secrets.DOCKER_HUB_USERNAME }}
gcp_gsm_credentials: ${{ secrets.GCP_GSM_CREDENTIALS }}
gcp_integration_tester_credentials: ${{ secrets.GCLOUD_INTEGRATION_TESTER }}
sentry_dsn: ${{ secrets.SENTRY_AIRBYTE_CI_DSN }}
git_branch: ${{ steps.extract_branch.outputs.branch }}
git_revision: ${{ steps.fetch_last_commit_id_pr.outputs.commit_id }}
github_token: ${{ secrets.GH_PAT_MAINTENANCE_OSS }}
s3_build_cache_access_key_id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }}
s3_build_cache_secret_key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }}
subcommand: connectors ${{ env.USE_LOCAL_CDK_FLAG }} --name ${{ github.event.inputs.connector_name }} test --only-step connector_live_tests --connector_live_tests.test-suite=regression --connector_live_tests.connection-id=${{ github.event.inputs.connection_id }} --connector_live_tests.pr-url=${{ github.event.inputs.pr_url }} ${{ env.READ_WITH_STATE_FLAG }} ${{ env.DISABLE_PROXY_FLAG }} ${{ env.STREAM_PARAMS }} ${{ env.CONNECTION_SUBSET }} ${{ env.CONTROL_VERSION }} --global-status-check-context="Regression Tests" --global-status-check-description='Running regression tests'

View File

@@ -32,9 +32,10 @@ jobs:
with:
fetch-depth: 0
- uses: errata-ai/vale-action@d89dee975228ae261d22c15adcd03578634d429c # Pinned to V2.1.1
continue-on-error: true # Always pass, even if reviewdog can't post annotations (e.g., fork PRs with read-only tokens)
with:
vale_flags: --config=docusaurus/vale-ci.ini --minAlertLevel=warning # CI-specific config that disables certain rules (see vale-ci.ini vs vale.ini)
vale_flags: --config=docusaurus/vale.ini --minAlertLevel=warning # Use vale.ini with minAlertLevel overridden to warning for CI
files: docs/ # Folder in which to lint
filter_mode: added # Only lint things that have changed
fail_on_error: false # Don't fail if the linter finds issues (compliance is optional)
reporter: github-pr-review # Post as annotations on the Changed Files page
reporter: local # Output to job logs only, no PR annotations or comments

View File

@@ -1,4 +1,4 @@
name: Connector CI - Run Live Validation Tests
name: On-Demand Live Connector Validation Tests
concurrency:
# This is the name of the concurrency group. It is used to prevent concurrent runs of the same workflow.
@@ -14,17 +14,44 @@ concurrency:
on:
workflow_dispatch:
inputs:
connector_name:
description: Connector name (e.g. source-faker)
required: true
# Global static-arg inputs for slash commands
repo:
description: "The repository name. Optional. Defaults to 'airbytehq/airbyte'."
required: false
default: "airbytehq/airbyte"
type: string
gitref:
description: "The git reference (branch or tag). Optional. Defaults to the default branch."
required: false
type: string
comment-id:
description: "The ID of the comment triggering the workflow. Optional."
required: false
type: number
pr:
description: "The pull request number, if applicable. Optional."
required: false
type: number
# Workflow-specific inputs
connector_filter:
description: >
Connector filter. Will be passed to the `airbyte-ci connectors` command.
To select all modified connectors, use '--modified'. To select specific connectors,
pass one or or more `--name` args, e.g. '--name=source-faker --name=source-hardcoded-records'.
default: "--modified"
connection_id:
description: ID of the connection to test; use "auto" to let the connection retriever choose a connection
required: true
pr_url:
description: URL of the PR containing the code change
required: true
description: >
Connection ID. ID of the connection to test; use "auto" to let the
connection retriever choose a connection.
default: "auto"
streams:
description: Streams to include in tests
description: >
(Optional) Streams. Which streams to include in tests.
If not set, these will be chosen automatically.
required: false
default: ""
type: string
should_read_with_state:
description: Whether to run tests against the read command with state
default: "true"
@@ -37,13 +64,16 @@ on:
description: Disable proxy for requests
default: "false"
type: boolean
connection_subset:
description: The subset of connections to select from.
required: true
type: choice
options:
- sandboxes
- all
# Workaround: GitHub currently supports a max of 10 inputs for workflow_dispatch events.
# We need to consolidate some inputs to stay within this limit.
# connection_subset:
# description: The subset of connections to select from.
# default: "sandboxes"
# type: choice
# options:
# - sandboxes
# - all
jobs:
live_tests:
@@ -119,7 +149,10 @@ jobs:
- name: Setup Connection Subset Option
if: github.event_name == 'workflow_dispatch'
run: |
echo "CONNECTION_SUBSET=--connector_live_tests.connection-subset=${{ github.event.inputs.connection_subset }}" >> $GITHUB_ENV
echo "CONNECTION_SUBSET=--connector_live_tests.connection-subset=sandboxes" >> $GITHUB_ENV
# TODO: re-enable when we have resolved the more-than-10-inputs issue in workflow_dispatch.
# run: |
# echo "CONNECTION_SUBSET=--connector_live_tests.connection-subset=${{ github.event.inputs.connection_subset }}" >> $GITHUB_ENV
# NOTE: We still use a PAT here (rather than a GitHub App) because the workflow needs
# permissions to add commits to our main repo as well as forks. This will only work on
@@ -141,4 +174,4 @@ jobs:
github_token: ${{ secrets.GH_PAT_MAINTENANCE_OSS }}
s3_build_cache_access_key_id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }}
s3_build_cache_secret_key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }}
subcommand: connectors ${{ env.USE_LOCAL_CDK_FLAG }} --name ${{ github.event.inputs.connector_name }} test --only-step connector_live_tests --connector_live_tests.test-suite=live --connector_live_tests.connection-id=${{ github.event.inputs.connection_id }} --connector_live_tests.pr-url=${{ github.event.inputs.pr_url }} ${{ env.READ_WITH_STATE_FLAG }} ${{ env.DISABLE_PROXY_FLAG }} ${{ env.STREAM_PARAMS }} ${{ env.CONNECTION_SUBSET }}
subcommand: connectors ${{ env.USE_LOCAL_CDK_FLAG }} ${{ inputs.connector_filter }} test --only-step connector_live_tests --connector_live_tests.test-suite=live --connector_live_tests.connection-id=${{ github.event.inputs.connection_id }} --connector_live_tests.pr-url="https://github.com/airbytehq/airbyte/pull/${{ github.event.inputs.pr }}" ${{ env.READ_WITH_STATE_FLAG }} ${{ env.DISABLE_PROXY_FLAG }} ${{ env.STREAM_PARAMS }} ${{ env.CONNECTION_SUBSET }}

View File

@@ -0,0 +1,315 @@
name: On-Demand Connector Regression Tests
concurrency:
# This is the name of the concurrency group. It is used to prevent concurrent runs of the same workflow.
#
# - github.head_ref is only defined on PR runs, it makes sure that the concurrency group is unique for pull requests
# ensuring that only one run per pull request is active at a time.
#
# - github.run_id is defined on all runs, it makes sure that the concurrency group is unique for workflow dispatches.
# This allows us to run multiple workflow dispatches in parallel.
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
on:
workflow_dispatch:
inputs:
# Global static-arg inputs for slash commands
repo:
description: "The repository name"
required: false
default: "airbytehq/airbyte"
type: string
gitref:
description: "The git reference (branch or tag)"
required: false
type: string
comment-id:
description: "The ID of the comment triggering the workflow"
required: false
type: number
pr:
description: "The pull request number, if applicable"
required: false
type: number
# Workflow-specific inputs
connector_filter:
description: >
Connector filter. Will be passed to the `airbyte-ci connectors` command.
To select all modified connectors, use '--modified'. To select specific connectors,
pass one or or more `--name` args, e.g. '--name=source-faker --name=source-hardcoded-records'.
default: "--modified"
connection_id:
description: >
Connection ID. ID of the connection to test; use "auto" to let the
connection retriever choose a connection.
default: "auto"
streams:
description: >
(Optional) Streams. Which streams to include in tests.
If not set, these will be chosen automatically.
required: false
default: ""
type: string
should_read_with_state:
description: Whether to run tests against the read command with state
default: "true"
type: boolean
use_local_cdk:
description: Use the local CDK when building the target connector
default: "false"
type: boolean
disable_proxy:
description: Disable proxy for requests
default: "false"
type: boolean
# Workaround: GitHub currently supports a max of 10 inputs for workflow_dispatch events.
# We need to consolidate some inputs to stay within this limit.
# connection_subset:
# description: The subset of connections to select from.
# default: "sandboxes"
# type: choice
# options:
# - sandboxes
# - all
# control_version:
# description: The version to use as a control version. This is useful when the version defined in the cloud registry does not have a lot of usage (either because a progressive rollout is underway or because a new version has just been released).
# required: false
# type: string
jobs:
regression_tests:
name: Regression Tests
runs-on: linux-24.04-large # Custom runner, defined in GitHub org settings
timeout-minutes: 360 # 6 hours
permissions:
contents: read
pull-requests: write
issues: write
steps:
- name: Append start with run link
id: pr-comment-id
if: github.event_name == 'workflow_dispatch' && github.event.inputs.pr != ''
uses: peter-evans/create-or-update-comment@v4
with:
token: ${{ github.token }}
issue-number: ${{ github.event.inputs.pr }}
comment-id: ${{ github.event.inputs.comment-id }}
edit-mode: append
body: |
> Starting regression tests (filter: `${{ github.event.inputs.connector_filter || '--modified' }}`)
> Workflow run: [${{ github.run_id }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})
- name: Install Python
id: install_python
uses: actions/setup-python@7f4fc3e22c37d6ff65e88745f38bd3157c663f7c # v4.9.1
with:
python-version: "3.11"
check-latest: true
update-environment: true
- name: Checkout Airbyte
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
- name: Extract branch name [WORKFLOW DISPATCH]
shell: bash
if: github.event_name == 'workflow_dispatch'
run: echo "branch=${GITHUB_REF#refs/heads/}" >> $GITHUB_OUTPUT
id: extract_branch
- name: Install Poetry
id: install_poetry
uses: snok/install-poetry@76e04a911780d5b312d89783f7b1cd627778900a # v1.4.1
with:
version: 1.8.5
- name: Make poetry venv in project
id: poetry_venv
run: poetry config virtualenvs.in-project true
- name: Install Python packages
id: install_python_packages
working-directory: airbyte-ci/connectors/pipelines
run: poetry install
- name: Fetch last commit id from remote branch [WORKFLOW DISPATCH]
if: github.event_name == 'workflow_dispatch'
id: fetch_last_commit_id_wd
run: echo "commit_id=$(git rev-parse origin/${{ steps.extract_branch.outputs.branch }})" >> $GITHUB_OUTPUT
- name: Setup Stream Parameters
if: github.event_name == 'workflow_dispatch'
run: |
if [ -z "${{ github.event.inputs.streams }}" ]; then
echo "STREAM_PARAMS=" >> $GITHUB_ENV
else
STREAMS=$(echo "${{ github.event.inputs.streams }}" | sed 's/,/ --connector_live_tests.selected-streams=/g')
echo "STREAM_PARAMS=--connector_live_tests.selected-streams=$STREAMS" >> $GITHUB_ENV
fi
- name: Setup Local CDK Flag
if: github.event_name == 'workflow_dispatch'
run: |
if ${{ github.event.inputs.use_local_cdk }}; then
echo "USE_LOCAL_CDK_FLAG=--use-local-cdk" >> $GITHUB_ENV
else
echo "USE_LOCAL_CDK_FLAG=" >> $GITHUB_ENV
fi
- name: Setup State Flag
if: github.event_name == 'workflow_dispatch'
run: |
if ${{ github.event.inputs.should_read_with_state }}; then
echo "READ_WITH_STATE_FLAG=--connector_live_tests.should-read-with-state" >> $GITHUB_ENV
else
echo "READ_WITH_STATE_FLAG=" >> $GITHUB_ENV
fi
- name: Setup Proxy Flag
if: github.event_name == 'workflow_dispatch'
run: |
if ${{ github.event.inputs.disable_proxy }}; then
echo "DISABLE_PROXY_FLAG=--connector_live_tests.disable-proxy" >> $GITHUB_ENV
else
echo "DISABLE_PROXY_FLAG=" >> $GITHUB_ENV
fi
- name: Setup Connection Subset Option
if: github.event_name == 'workflow_dispatch'
run: |
echo "CONNECTION_SUBSET=--connector_live_tests.connection-subset=sandboxes" >> $GITHUB_ENV
# TODO: re-enable when we have resolved the more-than-10-inputs issue in workflow_dispatch.
# run: |
# echo "CONNECTION_SUBSET=--connector_live_tests.connection-subset=${{ github.event.inputs.connection_subset }}" >> $GITHUB_ENV
- name: Setup Control Version
if: github.event_name == 'workflow_dispatch'
run: |
echo "CONTROL_VERSION=" >> $GITHUB_ENV
# TODO: re-enable when we have resolved the more-than-10-inputs issue in workflow_dispatch.
# run: |
# if [ -n "${{ github.event.inputs.control_version }}" ]; then
# echo "CONTROL_VERSION=--connector_live_tests.control-version=${{ github.event.inputs.control_version }}" >> $GITHUB_ENV
# else
# echo "CONTROL_VERSION=" >> $GITHUB_ENV
# fi
# NOTE: We still use a PAT here (rather than a GitHub App) because the workflow needs
# permissions to add commits to our main repo as well as forks. This will only work on
# forks if the user installs the app into their fork. Until we document this as a clear
# path, we will have to keep using the PAT.
- name: Run Regression Tests [WORKFLOW DISPATCH]
id: run-regression-tests
if: github.event_name == 'workflow_dispatch' # TODO: consider using the matrix strategy (https://docs.github.com/en/actions/using-jobs/using-a-matrix-for-your-jobs). See https://github.com/airbytehq/airbyte/pull/37659#discussion_r1583380234 for details.
uses: ./.github/actions/run-airbyte-ci
with:
context: "manual"
dagger_cloud_token: ${{ secrets.DAGGER_CLOUD_TOKEN_CACHE_3 }}
docker_hub_password: ${{ secrets.DOCKER_HUB_PASSWORD }}
docker_hub_username: ${{ secrets.DOCKER_HUB_USERNAME }}
gcp_gsm_credentials: ${{ secrets.GCP_GSM_CREDENTIALS }}
gcp_integration_tester_credentials: ${{ secrets.GCLOUD_INTEGRATION_TESTER }}
sentry_dsn: ${{ secrets.SENTRY_AIRBYTE_CI_DSN }}
git_branch: ${{ steps.extract_branch.outputs.branch }}
git_revision: ${{ steps.fetch_last_commit_id_pr.outputs.commit_id }}
github_token: ${{ secrets.GH_PAT_MAINTENANCE_OSS }}
s3_build_cache_access_key_id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }}
s3_build_cache_secret_key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }}
subcommand: connectors ${{ env.USE_LOCAL_CDK_FLAG }} ${{ inputs.connector_filter }} test --only-step connector_live_tests --connector_live_tests.test-suite=regression --connector_live_tests.connection-id=${{ github.event.inputs.connection_id }} --connector_live_tests.pr-url="https://github.com/airbytehq/airbyte/pull/${{ github.event.inputs.pr }}" ${{ env.READ_WITH_STATE_FLAG }} ${{ env.DISABLE_PROXY_FLAG }} ${{ env.STREAM_PARAMS }} ${{ env.CONNECTION_SUBSET }} ${{ env.CONTROL_VERSION }} --global-status-check-context="Regression Tests" --global-status-check-description='Running regression tests'
- name: Locate regression test report
if: always() && github.event_name == 'workflow_dispatch'
id: locate-report
run: |
# Find the most recent report.html file in /tmp/live_tests_artifacts/
REPORT_PATH=$(find /tmp/live_tests_artifacts -name "report.html" -type f -printf '%T@ %p\n' 2>/dev/null | sort -n | tail -1 | cut -f2- -d" ")
if [ -n "$REPORT_PATH" ]; then
echo "report_path=$REPORT_PATH" >> "$GITHUB_OUTPUT"
echo "Found report at: $REPORT_PATH"
else
echo "report_path=" >> "$GITHUB_OUTPUT"
echo "No report.html found in /tmp/live_tests_artifacts/"
fi
- name: Upload regression test report
if: always() && github.event_name == 'workflow_dispatch' && steps.locate-report.outputs.report_path != ''
uses: actions/upload-artifact@v4
with:
name: regression-test-report
path: ${{ steps.locate-report.outputs.report_path }}
if-no-files-found: ignore
- name: Append regression outcome
if: always() && github.event_name == 'workflow_dispatch' && github.event.inputs.pr != ''
uses: peter-evans/create-or-update-comment@v4
with:
token: ${{ github.token }}
comment-id: ${{ steps.pr-comment-id.outputs.comment-id }}
edit-mode: append
body: |
> Regression tests: ${{ steps.run-regression-tests.outcome == 'success' && '✅ PASSED' || steps.run-regression-tests.outcome == 'failure' && '❌ FAILED' || steps.run-regression-tests.outcome == 'cancelled' && '⚠️ CANCELLED' || steps.run-regression-tests.outcome == 'skipped' && '⏭️ SKIPPED' || '❓ UNKNOWN' }}
> Report: ${{ steps.locate-report.outputs.report_path != '' && 'artifact `regression-test-report` available in the run' || 'not generated' }}
- name: Install live-tests dependencies for LLM evaluation
if: always() && github.event_name == 'workflow_dispatch'
working-directory: airbyte-ci/connectors/live-tests
run: poetry install
- name: Install and Start Ollama
if: always() && github.event_name == 'workflow_dispatch'
run: |
curl -fsSL https://ollama.com/install.sh | sh
ollama serve &
sleep 5
ollama pull llama3.2:3b
echo "Ollama server started and model pulled"
- name: Evaluate Regression Test Report with LLM
if: always() && github.event_name == 'workflow_dispatch' && steps.locate-report.outputs.report_path != ''
id: llm-eval
continue-on-error: true
working-directory: airbyte-ci/connectors/live-tests
env:
OPENAI_API_KEY: ollama
OPENAI_BASE_URL: http://127.0.0.1:11434/v1
EVAL_MODEL: llama3.2:3b
run: |
set -u
echo "ran=false" >> "$GITHUB_OUTPUT"
echo "result=error" >> "$GITHUB_OUTPUT"
REPORT_PATH="${{ steps.locate-report.outputs.report_path }}"
if [ -z "$REPORT_PATH" ]; then
echo "Error: No report path provided from locate-report step" >&2
echo "## ⚠️ LLM Evaluation Skipped" >> "$GITHUB_STEP_SUMMARY"
echo "No regression test report found. The tests may have failed to generate a report." >> "$GITHUB_STEP_SUMMARY"
exit 1
fi
echo "Evaluating report at: $REPORT_PATH"
# Run the evaluation script
OUT_JSON="$RUNNER_TEMP/llm_eval.json"
poetry run python src/live_tests/regression_tests/llm_evaluation/evaluate_report.py \
--report-path "$REPORT_PATH" \
--output-json "$OUT_JSON"
# If we got here, script exit 0 and produced a judgment
PASS=$(jq -r '.evaluation.pass' "$OUT_JSON")
if [ "$PASS" = "true" ]; then RES="pass"; else RES="fail"; fi
echo "ran=true" >> "$GITHUB_OUTPUT"
echo "result=$RES" >> "$GITHUB_OUTPUT"
- name: Append LLM outcome
if: always() && github.event_name == 'workflow_dispatch' && github.event.inputs.pr != ''
env:
EVAL_MODEL: llama3.2:3b
uses: peter-evans/create-or-update-comment@v4
with:
token: ${{ github.token }}
comment-id: ${{ steps.pr-comment-id.outputs.comment-id }}
edit-mode: append
body: |
> LLM Evaluation: ${{ steps.llm-eval.outputs.ran == 'true' && (steps.llm-eval.outputs.result == 'pass' && '✅ PASS' || steps.llm-eval.outputs.result == 'fail' && '❌ FAIL' || '⚠️ ERROR') || '⚠️ Did not run' }}${{ steps.llm-eval.outputs.ran == 'true' && format(' (model: {0})', env.EVAL_MODEL) || '' }}

View File

@@ -35,16 +35,23 @@ jobs:
issue-type: both
commands: |
ai-canary-prerelease
ai-prove-fix
ai-release-watch
approve-regression-tests
bump-bulk-cdk-version
bump-progressive-rollout-version
bump-version
build-connector-images
connector-performance
format-fix
poe
publish-connectors-prerelease
publish-java-cdk
run-cat-tests
run-connector-tests
run-live-tests
run-regression-tests
test-performance
update-connector-cdk-version

View File

@@ -0,0 +1,66 @@
# Contributing to the Kotlin Bulk CDK
Thank you for your interest in contributing to the Airbyte Kotlin Bulk CDK!
## Prerequisites
- **JDK 21** (Java Development Kit) or higher
- **Gradle** (uses the wrapper, no separate installation needed)
### If you need to install Java
```bash
# Get sdkman (https://sdkman.io/)
curl -s "https://get.sdkman.io" | bash
source "$HOME/.sdkman/bin/sdkman-init.sh"
# Verify install
sdk version
# Show available versions
sdk list java | grep 21
# Install the latest and set as default
sdk install java 21.0.9-zulu
sdk default java 21.0.9-zulu
```
## Generating Documentation
The Kotlin Bulk CDK uses [Dokka](https://kotlinlang.org/docs/dokka-introduction.html) to generate API documentation from KDoc comments.
**Published Documentation**: The latest API documentation is available at https://airbyte-kotlin-cdk.vercel.app/
### Generate Documentation Locally
```bash
./gradlew :airbyte-cdk:bulk:docsGenerate
```
This generates HTML documentation in `airbyte-cdk/bulk/build/dokka/htmlMultiModule/`.
### View Generated Documentation
```bash
# macOS
open airbyte-cdk/bulk/build/dokka/htmlMultiModule/index.html
# Linux
xdg-open airbyte-cdk/bulk/build/dokka/htmlMultiModule/index.html
```
## Other Useful Commands
```bash
# Build all modules
./gradlew :airbyte-cdk:bulk:bulkCdkBuild
# Run tests
./gradlew :airbyte-cdk:bulk:test
```
## More Information
For architecture, publishing, development workflow, and other details, see the [README](README.md).
For general Airbyte contribution guidelines, see the [main contributing guide](../../docs/contributing-to-airbyte/README.md).

View File

@@ -4,6 +4,9 @@ The Bulk CDK is the "new java CDK" that's currently incubating.
As the name suggests, its purpose is to help develop connectors which extract or load data in bulk.
The Bulk CDK is written in Kotlin and uses the Micronaut framework for dependency injection.
- **API Reference Docs**: [Kotlin CDK API Reference](https://airbyte-kotlin-cdk.vercel.app/)
- **Contributing**: See [CONTRIBUTING.md](CONTRIBUTING.md).
## Structure
The Bulk CDK consists of a _core_ and a bunch of _toolkits_.

View File

@@ -9,6 +9,10 @@ import org.gradle.api.tasks.TaskAction
import org.gradle.api.tasks.options.Option
import org.w3c.dom.Document
plugins {
id 'org.jetbrains.dokka' version '2.0.0'
}
final var versionFile = file("version.properties")
final var cdkVersion = {
@@ -22,6 +26,7 @@ allprojects {
version = cdkVersion
apply plugin: 'java-library'
apply plugin: 'maven-publish'
apply plugin: 'org.jetbrains.dokka'
group 'io.airbyte.bulk-cdk'
@@ -79,6 +84,67 @@ allprojects {
}
}
// Configure Dokka for all subprojects
subprojects {
tasks.withType(org.jetbrains.dokka.gradle.DokkaTask.class) {
dokkaSourceSets {
configureEach {
// Only document public APIs
includeNonPublic.set(false)
skipEmptyPackages.set(true)
// Report undocumented members
reportUndocumented.set(true)
// Add external documentation links
externalDocumentationLinks {
create("kotlin") {
url.set(uri("https://kotlinlang.org/api/latest/jvm/stdlib/").toURL())
packageListUrl.set(uri("https://kotlinlang.org/api/latest/jvm/stdlib/package-list").toURL())
}
create("kotlinx-coroutines") {
url.set(uri("https://kotlinlang.org/api/kotlinx.coroutines/").toURL())
}
create("micronaut") {
url.set(uri("https://docs.micronaut.io/latest/api/").toURL())
}
}
// Source links back to GitHub
sourceLink {
localDirectory.set(file("src/main"))
remoteUrl.set(uri("https://github.com/airbytehq/airbyte/tree/master/airbyte-cdk/bulk/${project.name}/src/main").toURL())
remoteLineSuffix.set("#L")
}
}
}
}
}
// Configure the multi-module documentation task
tasks.named('dokkaHtmlMultiModule') {
moduleName.set("Airbyte Kotlin Bulk CDK")
outputDirectory.set(layout.buildDirectory.dir("dokka/htmlMultiModule"))
}
// Convenience task for local development
tasks.register('docsGenerate') {
group = 'documentation'
description = 'Generate Dokka documentation for all modules'
dependsOn 'dokkaHtmlMultiModule'
doLast {
println "Documentation generated at: ${layout.buildDirectory.dir("dokka/htmlMultiModule").get()}"
}
}
// Backwards-compatible alias
tasks.register('dokkaGenerate') {
group = 'documentation'
description = 'Generate Dokka documentation for all modules (alias for docsGenerate)'
dependsOn 'docsGenerate'
}
tasks.register('checkBuildNumber') {
description = "Check that the version doesn't exist"

View File

@@ -1,3 +1,50 @@
## Version 0.1.88
**Load CDK**
* Add CDC_CURSOR_COLUMN_NAME constant.
## Version 0.1.87
**Load CDK**
* Properly call NamespaceMapper before calculating final table names.
## Version 0.1.86
**Load CDK**
* Adds toFinalSchema "escape hatch" for final table schema munging
* Refactored Component test fixtures to require explicit StreamTableSchema creation using TableSchemaFactory
## Version 0.1.85
**Extract CDK**
* Fix CDC partition reader race condition when draining records after debezium shutdown.
## Version 0.1.84
load cdk: Move most DB packages into core. Refactor table schema interface into TableSchemaMapper.
## Version 0.1.83
load cdk: more tests to help guide dependency injection dependency implementations
## Version 0.1.82
load cdk: components tests: more schema evolution testcases
## Version 0.1.81
load cdk: components tests: more coverage on upsert
## Version 0.1.80
**Extract CDK**
* Fix default partition_id value for `CheckpointOnlyPartitionReader`.
## Version 0.1.79
**Extract CDK**

View File

@@ -13,6 +13,7 @@ kotlin {
dependencies {
api("com.github.f4b6a3:uuid-creator:6.1.1")
implementation 'commons-codec:commons-codec:1.16.0'
implementation project(':airbyte-cdk:bulk:core:bulk-cdk-core-base')
implementation 'org.apache.commons:commons-lang3:3.17.0'

View File

@@ -165,6 +165,7 @@ abstract class BaseMockBasicFunctionalityIntegrationTest(
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
val e =
assertThrows<DestinationUncleanExitException> {
@@ -202,6 +203,7 @@ abstract class BaseMockBasicFunctionalityIntegrationTest(
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
val returnedMessages =
@@ -324,7 +326,8 @@ abstract class BaseMockBasicFunctionalityIntegrationTest(
namespaceDefinitionType = namespaceMappingConfig.namespaceDefinitionType,
streamPrefix = namespaceMappingConfig.streamPrefix,
namespaceFormat = namespaceMappingConfig.namespaceFormat
)
),
tableSchema = emptyTableSchema,
)
namespaceValidator(
stream.unmappedNamespace,

View File

@@ -12,6 +12,10 @@ import io.airbyte.cdk.load.command.NamespaceMapper
import io.airbyte.cdk.load.data.ObjectTypeWithoutSchema
import io.airbyte.cdk.load.message.DestinationRecordStreamComplete
import io.airbyte.cdk.load.message.InputRecord
import io.airbyte.cdk.load.schema.model.ColumnSchema
import io.airbyte.cdk.load.schema.model.StreamTableSchema
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.schema.model.TableNames
import io.airbyte.cdk.load.util.serializeToString
import io.airbyte.cdk.load.write.WriteOperation
import io.github.oshai.kotlinlogging.KotlinLogging
@@ -46,7 +50,18 @@ interface DestinationChecker<C : DestinationConfiguration> {
generationId = 1,
minimumGenerationId = 0,
syncId = 1,
namespaceMapper = NamespaceMapper()
namespaceMapper = NamespaceMapper(),
tableSchema =
StreamTableSchema(
tableNames = TableNames(finalTableName = TableName("testing", "test")),
columnSchema =
ColumnSchema(
inputSchema = mapOf(),
inputToFinalColumnNames = mapOf(),
finalSchema = mapOf(),
),
importType = Append,
)
)
fun check(config: C)

View File

@@ -10,10 +10,15 @@ import io.airbyte.cdk.load.config.CHECK_STREAM_NAMESPACE
import io.airbyte.cdk.load.data.FieldType
import io.airbyte.cdk.load.data.IntegerType
import io.airbyte.cdk.load.data.ObjectType
import io.airbyte.cdk.load.schema.TableNameResolver
import io.airbyte.cdk.load.schema.model.ColumnSchema
import io.airbyte.cdk.load.schema.model.StreamTableSchema
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.schema.model.TableNames
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog
import io.github.oshai.kotlinlogging.KotlinLogging
import io.micronaut.context.annotation.Factory
import io.micronaut.context.annotation.Value
import io.micronaut.context.annotation.Requires
import jakarta.inject.Named
import jakarta.inject.Singleton
import java.time.LocalDate
@@ -91,45 +96,81 @@ data class DestinationCatalog(val streams: List<DestinationStream> = emptyList()
}
}
interface DestinationCatalogFactory {
fun make(): DestinationCatalog
}
@Factory
class DefaultDestinationCatalogFactory {
@Requires(property = Operation.PROPERTY, notEquals = "check")
@Singleton
fun getDestinationCatalog(
fun syncCatalog(
catalog: ConfiguredAirbyteCatalog,
streamFactory: DestinationStreamFactory,
@Value("\${${Operation.PROPERTY}}") operation: String,
tableNameResolver: TableNameResolver,
namespaceMapper: NamespaceMapper,
): DestinationCatalog {
// we resolve the table names with the properly mapped descriptors
val mappedDescriptors =
catalog.streams.map { namespaceMapper.map(it.stream.namespace, it.stream.name) }.toSet()
val names = tableNameResolver.getTableNameMapping(mappedDescriptors)
require(
names.size == catalog.streams.size,
{ "Invariant violation: An incomplete table name mapping was generated." }
)
return DestinationCatalog(
streams =
catalog.streams.map {
val key = namespaceMapper.map(it.stream.namespace, it.stream.name)
streamFactory.make(it, names[key]!!)
}
)
}
/**
* Warning: Most destinations do not use this.
*
* Catalog stub for running SYNC from within a CHECK operation.
*
* Used exclusively by the DefaultDestinationChecker.
*/
@Requires(property = Operation.PROPERTY, value = "check")
@Singleton
fun checkCatalog(
@Named("checkNamespace") checkNamespace: String?,
namespaceMapper: NamespaceMapper
): DestinationCatalog {
if (operation == "check") {
// generate a string like "20240523"
val date = LocalDate.now().format(DateTimeFormatter.ofPattern("yyyyMMdd"))
// generate 5 random characters
val random = RandomStringUtils.insecure().nextAlphabetic(5).lowercase()
val namespace = checkNamespace ?: "${CHECK_STREAM_NAMESPACE}_$date$random"
return DestinationCatalog(
listOf(
DestinationStream(
unmappedNamespace = namespace,
unmappedName = "test$date$random",
importType = Append,
schema =
ObjectType(
linkedMapOf("test" to FieldType(IntegerType, nullable = true))
),
generationId = 1,
minimumGenerationId = 0,
syncId = 1,
namespaceMapper = namespaceMapper
)
// generate a string like "20240523"
val date = LocalDate.now().format(DateTimeFormatter.ofPattern("yyyyMMdd"))
// generate 5 random characters
val random = RandomStringUtils.insecure().nextAlphabetic(5).lowercase()
val namespace = checkNamespace ?: "${CHECK_STREAM_NAMESPACE}_$date$random"
return DestinationCatalog(
listOf(
DestinationStream(
unmappedNamespace = namespace,
unmappedName = "test$date$random",
importType = Append,
schema =
ObjectType(linkedMapOf("test" to FieldType(IntegerType, nullable = true))),
generationId = 1,
minimumGenerationId = 0,
syncId = 1,
namespaceMapper = namespaceMapper,
tableSchema =
StreamTableSchema(
columnSchema =
ColumnSchema(
inputSchema = mapOf(),
inputToFinalColumnNames = mapOf(),
finalSchema = mapOf()
),
importType = Append,
tableNames =
TableNames(
finalTableName = TableName("namespace", "test"),
),
),
)
)
} else {
return DestinationCatalog(streams = catalog.streams.map { streamFactory.make(it) })
}
)
}
}

View File

@@ -9,15 +9,17 @@ import io.airbyte.cdk.load.data.AirbyteValueProxy
import io.airbyte.cdk.load.data.ObjectType
import io.airbyte.cdk.load.data.collectUnknownPaths
import io.airbyte.cdk.load.data.json.AirbyteTypeToJsonSchema
import io.airbyte.cdk.load.data.json.JsonSchemaToAirbyteType
import io.airbyte.cdk.load.message.DestinationRecord
import io.airbyte.cdk.load.message.Meta
import io.airbyte.cdk.load.schema.model.StreamTableSchema
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange
import io.airbyte.protocol.models.v0.AirbyteStream
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream
import io.airbyte.protocol.models.v0.DestinationSyncMode
import io.airbyte.protocol.models.v0.StreamDescriptor
import jakarta.inject.Singleton
import io.github.oshai.kotlinlogging.KotlinLogging
private val log = KotlinLogging.logger {}
/**
* Internal representation of destination streams. This is intended to be a case class specialized
@@ -64,7 +66,8 @@ data class DestinationStream(
val includeFiles: Boolean = false,
val destinationObjectName: String? = null,
val matchingKey: List<String>? = null,
private val namespaceMapper: NamespaceMapper
private val namespaceMapper: NamespaceMapper,
val tableSchema: StreamTableSchema,
) {
val unmappedDescriptor = Descriptor(namespace = unmappedNamespace, name = unmappedName)
val mappedDescriptor = namespaceMapper.map(namespace = unmappedNamespace, name = unmappedName)
@@ -181,58 +184,6 @@ fun AirbyteType.computeUnknownColumnChanges() =
)
}
@Singleton
class DestinationStreamFactory(
private val jsonSchemaToAirbyteType: JsonSchemaToAirbyteType,
private val namespaceMapper: NamespaceMapper
) {
fun make(stream: ConfiguredAirbyteStream): DestinationStream {
return DestinationStream(
unmappedNamespace = stream.stream.namespace,
unmappedName = stream.stream.name,
namespaceMapper = namespaceMapper,
importType =
when (stream.destinationSyncMode) {
null -> throw IllegalArgumentException("Destination sync mode was null")
DestinationSyncMode.APPEND -> Append
DestinationSyncMode.OVERWRITE -> Overwrite
DestinationSyncMode.APPEND_DEDUP ->
Dedupe(primaryKey = stream.primaryKey, cursor = stream.cursorField)
DestinationSyncMode.UPDATE -> Update
DestinationSyncMode.SOFT_DELETE -> SoftDelete
},
generationId = stream.generationId,
minimumGenerationId = stream.minimumGenerationId,
syncId = stream.syncId,
schema = jsonSchemaToAirbyteType.convert(stream.stream.jsonSchema),
isFileBased = stream.stream.isFileBased ?: false,
includeFiles = stream.includeFiles ?: false,
destinationObjectName = stream.destinationObjectName,
matchingKey =
stream.destinationObjectName?.let {
fromCompositeNestedKeyToCompositeKey(stream.primaryKey)
}
)
}
}
private fun fromCompositeNestedKeyToCompositeKey(
compositeNestedKey: List<List<String>>
): List<String> {
if (compositeNestedKey.any { it.size > 1 }) {
throw IllegalArgumentException(
"Nested keys are not supported for matching keys. Key was $compositeNestedKey"
)
}
if (compositeNestedKey.any { it.isEmpty() }) {
throw IllegalArgumentException(
"Parts of the composite key need to have at least one element. Key was $compositeNestedKey"
)
}
return compositeNestedKey.map { it[0] }.toList()
}
sealed interface ImportType
data object Append : ImportType

View File

@@ -0,0 +1,89 @@
/*
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.command
import io.airbyte.cdk.load.data.FieldType
import io.airbyte.cdk.load.data.ObjectType
import io.airbyte.cdk.load.data.ObjectTypeWithEmptySchema
import io.airbyte.cdk.load.data.ObjectTypeWithoutSchema
import io.airbyte.cdk.load.data.json.JsonSchemaToAirbyteType
import io.airbyte.cdk.load.schema.TableSchemaFactory
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream
import io.airbyte.protocol.models.v0.DestinationSyncMode
import io.github.oshai.kotlinlogging.KotlinLogging
import jakarta.inject.Singleton
private val log = KotlinLogging.logger {}
@Singleton
class DestinationStreamFactory(
private val jsonSchemaToAirbyteType: JsonSchemaToAirbyteType,
private val namespaceMapper: NamespaceMapper,
private val schemaFactory: TableSchemaFactory,
) {
fun make(stream: ConfiguredAirbyteStream, resolvedTableName: TableName): DestinationStream {
val airbyteSchemaType = jsonSchemaToAirbyteType.convert(stream.stream.jsonSchema)
val airbyteSchema: Map<String, FieldType> =
when (airbyteSchemaType) {
is ObjectType -> airbyteSchemaType.properties
is ObjectTypeWithEmptySchema,
is ObjectTypeWithoutSchema -> emptyMap()
else -> throw IllegalStateException("")
}
val importType =
when (stream.destinationSyncMode) {
null -> throw IllegalArgumentException("Destination sync mode was null")
DestinationSyncMode.APPEND -> Append
DestinationSyncMode.OVERWRITE -> Overwrite
DestinationSyncMode.APPEND_DEDUP ->
Dedupe(primaryKey = stream.primaryKey, cursor = stream.cursorField)
DestinationSyncMode.UPDATE -> Update
DestinationSyncMode.SOFT_DELETE -> SoftDelete
}
val tableSchema =
schemaFactory.make(
resolvedTableName,
airbyteSchema,
importType,
)
return DestinationStream(
unmappedNamespace = stream.stream.namespace,
unmappedName = stream.stream.name,
namespaceMapper = namespaceMapper,
importType = importType,
generationId = stream.generationId,
minimumGenerationId = stream.minimumGenerationId,
syncId = stream.syncId,
schema = airbyteSchemaType,
isFileBased = stream.stream.isFileBased ?: false,
includeFiles = stream.includeFiles ?: false,
destinationObjectName = stream.destinationObjectName,
matchingKey =
stream.destinationObjectName?.let {
fromCompositeNestedKeyToCompositeKey(stream.primaryKey)
},
tableSchema = tableSchema,
)
}
private fun fromCompositeNestedKeyToCompositeKey(
compositeNestedKey: List<List<String>>
): List<String> {
if (compositeNestedKey.any { it.size > 1 }) {
throw IllegalArgumentException(
"Nested keys are not supported for matching keys. Key was $compositeNestedKey",
)
}
if (compositeNestedKey.any { it.isEmpty() }) {
throw IllegalArgumentException(
"Parts of the composite key need to have at least one element. Key was $compositeNestedKey",
)
}
return compositeNestedKey.map { it[0] }.toList()
}
}

View File

@@ -5,8 +5,8 @@
package io.airbyte.cdk.load.component
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.cdk.load.table.TableName
/**
* Client interface for database table operations.

View File

@@ -5,8 +5,8 @@
package io.airbyte.cdk.load.component
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.cdk.load.table.TableName
import kotlin.collections.component1
import kotlin.collections.component2
import kotlin.collections.contains

View File

@@ -1,12 +0,0 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.dataflow.transform
import io.airbyte.cdk.load.command.DestinationStream
/** Used by the CDK to pass the final column name to the aggregate buffer. */
interface ColumnNameMapper {
fun getMappedColumnName(stream: DestinationStream, columnName: String): String? = columnName
}

View File

@@ -1,15 +0,0 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.dataflow.transform.defaults
import io.airbyte.cdk.load.dataflow.transform.ColumnNameMapper
import io.micronaut.context.annotation.Secondary
import jakarta.inject.Singleton
/*
* Default implementation of the ColumnNameMapper. If your destination needs destination-specific
* column name mapping, create your own ColumnNameMapper implementation in your destination.
*/
@Singleton @Secondary class NoOpColumnNameMapper : ColumnNameMapper

View File

@@ -5,14 +5,12 @@
package io.airbyte.cdk.load.dataflow.transform.medium
import io.airbyte.cdk.load.data.AirbyteValue
import io.airbyte.cdk.load.dataflow.transform.ColumnNameMapper
import io.airbyte.cdk.load.dataflow.transform.ValueCoercer
import io.airbyte.cdk.load.dataflow.transform.data.ValidationResultHandler
import jakarta.inject.Singleton
@Singleton
class JsonConverter(
private val columnNameMapper: ColumnNameMapper,
private val coercer: ValueCoercer,
private val validationResultHandler: ValidationResultHandler,
) : MediumConverter {
@@ -24,10 +22,7 @@ class JsonConverter(
val munged = HashMap<String, AirbyteValue>()
enriched.declaredFields.forEach { field ->
val mappedKey =
columnNameMapper.getMappedColumnName(input.msg.stream, field.key)
?: field.key // fallback to the original key
val mappedKey = enriched.stream.tableSchema.getFinalColumnName(field.key)
val mappedValue =
field.value
.let { coercer.map(it) }

View File

@@ -8,11 +8,9 @@ import io.airbyte.cdk.load.dataflow.state.PartitionKey
import io.airbyte.cdk.load.message.DestinationRecordRaw
/**
* Defines a contract for converting a given input into a structured map representation.
* Converts raw destination records into a map of final column name to munged final value.
*
* This interface provides the blueprint for implementing a conversion process that transforms raw
* destination record data, partitioning metadata, and optional source records into a map structure
* with specific key-value pairs.
* This interface provides the blueprint for different serialization intermediate representations.
*/
interface MediumConverter {
/**

View File

@@ -4,7 +4,6 @@
package io.airbyte.cdk.load.dataflow.transform.medium
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.data.AirbyteValue
import io.airbyte.cdk.load.data.AirbyteValueProxy.FieldAccessor
import io.airbyte.cdk.load.data.ArrayType
@@ -38,10 +37,8 @@ import io.airbyte.cdk.load.data.TimestampWithoutTimezoneValue
import io.airbyte.cdk.load.data.UnionType
import io.airbyte.cdk.load.data.UnknownType
import io.airbyte.cdk.load.data.json.toAirbyteValue
import io.airbyte.cdk.load.dataflow.transform.ColumnNameMapper
import io.airbyte.cdk.load.dataflow.transform.ValueCoercer
import io.airbyte.cdk.load.dataflow.transform.data.ValidationResultHandler
import io.airbyte.cdk.load.dataflow.transform.defaults.NoOpColumnNameMapper
import io.airbyte.cdk.load.message.DestinationRecordProtobufSource
import io.airbyte.cdk.load.message.DestinationRecordRaw
import io.airbyte.cdk.load.message.Meta
@@ -57,7 +54,6 @@ import java.time.LocalTime
import java.time.OffsetDateTime
import java.time.OffsetTime
import java.time.ZoneOffset
import java.util.concurrent.ConcurrentHashMap
import javax.inject.Singleton
/**
@@ -66,33 +62,12 @@ import javax.inject.Singleton
*/
@Singleton
class ProtobufConverter(
private val columnNameMapper: ColumnNameMapper,
private val coercer: ValueCoercer,
private val validationResultHandler: ValidationResultHandler,
) : MediumConverter {
private val isNoOpMapper = columnNameMapper is NoOpColumnNameMapper
private val decoder = AirbyteValueProtobufDecoder()
private val perStreamMappedNames =
ConcurrentHashMap<DestinationStream.Descriptor, Array<String>>()
private fun mappedNamesFor(
stream: DestinationStream,
fieldAccessors: Array<FieldAccessor>
): Array<String> {
val key = stream.mappedDescriptor
return perStreamMappedNames.computeIfAbsent(key) {
val maxIndex = fieldAccessors.maxOfOrNull { it.index } ?: -1
val arr = Array(maxIndex + 1) { "" }
fieldAccessors.forEach { fa ->
val mapped = columnNameMapper.getMappedColumnName(stream, fa.name) ?: fa.name
arr[fa.index] = mapped
}
arr
}
}
override fun convert(input: ConversionInput): Map<String, AirbyteValue> {
check(input.msg.rawData is DestinationRecordProtobufSource) {
"The raw data must be a protobuf source."
@@ -140,12 +115,8 @@ class ProtobufConverter(
allParsingFailures.addAll(validatedValue.changes)
if (validatedValue.abValue !is NullValue || validatedValue.type !is UnknownType) {
val columnName =
if (isNoOpMapper) accessor.name
else
mappedNamesFor(stream, fieldAccessors).getOrElse(accessor.index) {
accessor.name
}
// Use column mapping from stream
val columnName = stream.tableSchema.getFinalColumnName(accessor.name)
result[columnName] = validatedValue.abValue
}
}

View File

@@ -14,7 +14,8 @@ import io.airbyte.cdk.load.data.ObjectType
import io.airbyte.cdk.load.data.json.toAirbyteValue
import io.airbyte.cdk.load.state.CheckpointId
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange
import java.util.*
import java.util.SequencedMap
import java.util.UUID
import kotlin.collections.LinkedHashMap
data class DestinationRecordRaw(

View File

@@ -0,0 +1,137 @@
/*
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.schema
import io.github.oshai.kotlinlogging.KotlinLogging
import jakarta.inject.Singleton
/** Applies destination-specific column name munging logic and handles any naming collisions. */
@Singleton
class ColumnNameResolver(
private val mapper: TableSchemaMapper,
) {
private val log = KotlinLogging.logger {}
/**
* Creates column name mapping with handling for potential collisions using incremental
* numbering, with advanced resolution for truncation cases.
*/
fun getColumnNameMapping(inputColumNames: Set<String>): Map<String, String> {
val processedColumnNames = mutableSetOf<String>()
val columnMappings = mutableMapOf<String, String>()
inputColumNames.forEach { columnName ->
val processedColumnName = mapper.toColumnName(columnName)
// Get a unique column name by adding incremental numbers if necessary
val finalColumnName =
resolveColumnNameCollision(
processedColumnName,
existingNames = processedColumnNames,
originalColumnName = columnName,
)
processedColumnNames.add(finalColumnName)
columnMappings[columnName] = finalColumnName
}
return columnMappings
}
/**
* Resolves column name collisions by first trying incremental suffixes (_1, _2, etc.) If that
* doesn't work due to name truncation, uses the more powerful superResolveColumnCollisions.
*
* @param processedName The name after initial processing by the column name generator
* @param existingNames Set of names already used for other columns
* @param originalColumnName The original column name before processing
*/
private fun resolveColumnNameCollision(
processedName: String,
existingNames: Set<String>,
originalColumnName: String,
): String {
// If processed name is unique, use it
if (!hasConflict(existingNames, processedName)) {
return processedName
}
log.info { "Detected column name collision for $originalColumnName" }
// Try adding incremental suffixes until we find a non-colliding name
var counter = 1
var candidateName: String
var previousCandidate = processedName
do {
// Generate candidate name by adding numeric suffix
candidateName = mapper.toColumnName("${originalColumnName}_$counter")
// Check if we're making progress (detecting potential truncation)
if (colsConflict(candidateName, previousCandidate)) {
// We're not making progress, likely due to name truncation
// Use the more powerful resolution method with the ORIGINAL column name
return superResolveColumnCollisions(
originalColumnName,
existingNames,
processedName.length,
)
}
previousCandidate = candidateName
counter++
} while (existingNames.any { colsConflict(it, candidateName) })
return candidateName
}
/**
* Generates a name of the format `<prefix><length><suffix>` when simple suffix-based conflict
* resolution fails due to name truncation. E.g. for affixLength=3: "veryLongName" -> "ver6ame"
*
* @param originalName The original column name that caused collision
* @param existingNames Set of existing column names to avoid collision with
* @param maximumColumnNameLength The maximum allowed length for the column name
*/
private fun superResolveColumnCollisions(
originalName: String,
existingNames: Set<String>,
maximumColumnNameLength: Int,
): String {
// Assume that the <length> portion can be expressed in at most 5 characters.
// If someone is giving us a column name that's longer than 99999 characters,
// that's just being silly.
val affixLength = (maximumColumnNameLength - 5) / 2
// If, after reserving 5 characters for the length, we can't fit the affixes,
// just give up. That means the destination is trying to restrict us to a
// 6-character column name, which is just silly.
if (affixLength <= 0) {
throw IllegalArgumentException(
"Cannot solve column name collision: $originalName. We recommend removing this column to continue syncing.",
)
}
val prefix = originalName.take(affixLength)
val suffix = originalName.substring(originalName.length - affixLength, originalName.length)
val length = originalName.length - 2 * affixLength
val newColumnName = mapper.toColumnName("$prefix$length$suffix")
// If there's still a collision after this, just give up.
// We could try to be more clever, but this is already a pretty rare case.
if (hasConflict(existingNames, newColumnName)) {
throw IllegalArgumentException(
"Cannot solve column name collision: $originalName. We recommend removing this column to continue syncing.",
)
}
return newColumnName
}
fun colsConflict(a: String, b: String): Boolean = mapper.colsConflict(a, b)
fun hasConflict(existingNames: Set<String>, candidate: String) =
existingNames.any { colsConflict(it, candidate) }
}

View File

@@ -0,0 +1,57 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.schema
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.schema.model.TableName
import io.github.oshai.kotlinlogging.KotlinLogging
import jakarta.inject.Singleton
import org.apache.commons.codec.digest.DigestUtils
/** Applies destination-specific table name munging logic and handles any naming collisions. */
@Singleton
class TableNameResolver(
private val mapper: TableSchemaMapper,
) {
private val log = KotlinLogging.logger {}
fun getTableNameMapping(
streamDescriptors: Set<DestinationStream.Descriptor>,
): Map<DestinationStream.Descriptor, TableName> {
val processedFinalTableNames = mutableSetOf<TableName>()
val result = mutableMapOf<DestinationStream.Descriptor, TableName>()
streamDescriptors.forEach { desc ->
val originalFinalTableName = mapper.toFinalTableName(desc)
val currentFinalProcessedName: TableName
val finalTableNameColliding = originalFinalTableName in processedFinalTableNames
if (finalTableNameColliding) {
log.info { "Detected table name collision for ${desc.namespace}.${desc.name}" }
// Create a hash-suffixed name to avoid collision
val hash =
DigestUtils.sha1Hex(
"${originalFinalTableName.namespace}&airbyte&${desc.name}",
)
.substring(0, 3)
val newName = "${desc.name}_$hash"
currentFinalProcessedName =
mapper.toFinalTableName(
desc.copy(name = newName),
)
processedFinalTableNames.add(currentFinalProcessedName)
} else {
processedFinalTableNames.add(originalFinalTableName)
currentFinalProcessedName = originalFinalTableName
}
result[desc] = currentFinalProcessedName
}
return result
}
}

View File

@@ -0,0 +1,54 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.schema
import io.airbyte.cdk.load.command.ImportType
import io.airbyte.cdk.load.data.FieldType
import io.airbyte.cdk.load.schema.model.ColumnSchema
import io.airbyte.cdk.load.schema.model.StreamTableSchema
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.schema.model.TableNames
import jakarta.inject.Singleton
@Singleton
class TableSchemaFactory(
private val mapper: TableSchemaMapper,
private val colNameResolver: ColumnNameResolver,
) {
fun make(
finalTableName: TableName,
inputSchema: Map<String, FieldType>,
importType: ImportType,
): StreamTableSchema {
val tempTableName = mapper.toTempTableName(finalTableName)
val tableNames =
TableNames(
finalTableName = finalTableName,
tempTableName = tempTableName,
)
val inputToFinalColumnNames = colNameResolver.getColumnNameMapping(inputSchema.keys)
val finalSchema =
inputSchema
.map { inputToFinalColumnNames[it.key]!! to mapper.toColumnType(it.value) }
.toMap()
val columnSchema =
ColumnSchema(
inputSchema = inputSchema,
inputToFinalColumnNames = inputToFinalColumnNames,
finalSchema = finalSchema,
)
val tableSchema =
StreamTableSchema(
tableNames,
columnSchema,
importType,
)
return mapper.toFinalSchema(tableSchema)
}
}

View File

@@ -0,0 +1,70 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.schema
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.component.ColumnType
import io.airbyte.cdk.load.data.FieldType
import io.airbyte.cdk.load.schema.model.StreamTableSchema
import io.airbyte.cdk.load.schema.model.TableName
/** Transforms input schema elements to destination-specific naming and type conventions. */
interface TableSchemaMapper {
/**
* Converts a stream descriptor to the final destination table name.
*
* @param desc The stream descriptor containing namespace and name information
* @return The mapped final table name in the destination system
*/
fun toFinalTableName(desc: DestinationStream.Descriptor): TableName
/**
* Generates a temporary table name based on the provided final table name. Temporary tables are
* typically used before data is moved to final tables to avoid data downtime.
*
* @param tableName The final table name to base the temporary name on
* @return The temporary table name
*/
fun toTempTableName(tableName: TableName): TableName
/**
* Transforms a column name from the input schema to comply with destination naming conventions.
* This may include handling special characters, case transformations, or length limitations.
*
* @param name The original column name from the input schema
* @return The destination-compatible column name
*/
fun toColumnName(name: String): String
/**
* Converts an Airbyte field type to the corresponding destination-specific column type. This
* handles mapping of data types from Airbyte's type system to the destination database's type
* system.
*
* @param fieldType The Airbyte field type to convert
* @return The destination-specific column type representation
*/
fun toColumnType(fieldType: FieldType): ColumnType
/**
* Performs any final transformations on the complete table schema before it's used in the
* destination. By default, returns the schema unchanged. Override to apply destination-specific
* schema modifications.
*
* @param tableSchema The complete stream table schema
* @return The finalized schema ready for use in the destination
*/
fun toFinalSchema(tableSchema: StreamTableSchema) = tableSchema
/**
* Determines if two column names conflict according to destination-specific rules. By default,
* performs case-insensitive comparison. Override for different conflict detection logic.
*
* @param a First column name
* @param b Second column name
* @return true if the column names conflict, false otherwise
*/
fun colsConflict(a: String, b: String): Boolean = a.equals(b, ignoreCase = true)
}

View File

@@ -0,0 +1,35 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.schema.defaults
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.component.ColumnType
import io.airbyte.cdk.load.data.FieldType
import io.airbyte.cdk.load.schema.TableSchemaMapper
import io.airbyte.cdk.load.schema.model.TableName
import io.micronaut.context.annotation.Secondary
import jakarta.inject.Singleton
/**
* Default schema mapper that performs no transformations on names or types.
*
* For destinations that don't do schema munging in the new paradigm.
*/
@Singleton
@Secondary
class NoopTableSchemaMapper : TableSchemaMapper {
override fun toFinalTableName(desc: DestinationStream.Descriptor) =
TableName(desc.namespace ?: "", desc.name)
override fun toTempTableName(tableName: TableName) = tableName
override fun toColumnName(name: String) = name
override fun toColumnType(fieldType: FieldType): ColumnType =
ColumnType(
fieldType.type.toString(),
fieldType.nullable,
)
}

View File

@@ -0,0 +1,18 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.schema.model
import io.airbyte.cdk.load.component.ColumnType
import io.airbyte.cdk.load.data.FieldType
/** Defines column mappings and types from source input to destination table schema. */
data class ColumnSchema(
// schema on input catalog
val inputSchema: Map<String, FieldType>,
// column name on input catalog to resolved name
val inputToFinalColumnNames: Map<String, String>,
// resolved name to resolved type
val finalSchema: Map<String, ColumnType>,
)

View File

@@ -0,0 +1,35 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.schema.model
import io.airbyte.cdk.load.command.Dedupe
import io.airbyte.cdk.load.command.ImportType
/**
* Schema information for a stream's table representation resolved for the target destination.
*
* Contains everything necessary to perform table operations for the associated stream.
*/
data class StreamTableSchema(
val tableNames: TableNames,
val columnSchema: ColumnSchema,
val importType: ImportType,
) {
fun getFinalColumnName(rawName: String) = columnSchema.inputToFinalColumnNames[rawName]!!
/** Note: Returns final munged column names. */
fun getCursor() =
if (importType is Dedupe)
importType.cursor.map { columnSchema.inputToFinalColumnNames[it]!! }
else emptyList()
/** Note: Returns final munged column names. */
fun getPrimaryKey() =
if (importType is Dedupe)
importType.primaryKey.map { keys ->
keys.map { columnSchema.inputToFinalColumnNames[it]!! }
}
else emptyList()
}

View File

@@ -2,7 +2,9 @@
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.table
package io.airbyte.cdk.load.schema.model
import io.airbyte.cdk.load.table.TableSuffixes
data class TableName(val namespace: String, val name: String) {
fun toPrettyString(quote: String = "", suffix: String = "") =

View File

@@ -0,0 +1,24 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.schema.model
/** Table names used during different stages of data loading. */
data class TableNames(
// raw only applies to T+D destinations. Pre-deprecated.
val rawTableName: TableName? = null,
val tempTableName: TableName? = null,
val finalTableName: TableName? = null,
) {
init {
check(rawTableName != null || finalTableName != null) {
"At least one table name should be nonnull"
}
}
fun toPrettyString() =
"Raw table: ${rawTableName?.toPrettyString()}; " +
"Temp table: ${tempTableName?.toPrettyString()}; " +
"Final table: ${finalTableName?.toPrettyString()}"
}

View File

@@ -4,21 +4,10 @@
package io.airbyte.cdk.load.table
import io.airbyte.cdk.util.invert
/**
* map from the column name as declared in the schema, to the column name that we'll create in the
* final (typed) table.
*/
@JvmInline
value class ColumnNameMapping(private val columnNameMapping: Map<String, String>) :
Map<String, String> by columnNameMapping {
/**
* Intended for test use only. If we actually need this at runtime, we probably should only
* compute the inverse map once.
*/
// the map is always safe to invert - the entire point of this mapping
// is that it's 1:1 between original and mapped names.
// (if any two columns mapped to the same name, then they'd collide in the destination).
fun originalName(mappedKey: String): String? = columnNameMapping.invert()[mappedKey]
}
Map<String, String> by columnNameMapping

View File

@@ -4,4 +4,13 @@
package io.airbyte.cdk.load.table
/**
* CDC meta column names.
*
* Note: These CDC column names are brittle as they are separate yet coupled to the logic sources
* use to generate these column names. See
* [io.airbyte.integrations.source.mssql.MsSqlSourceOperations.MsSqlServerCdcMetaFields] for an
* example.
*/
const val CDC_DELETED_AT_COLUMN = "_ab_cdc_deleted_at"
const val CDC_CURSOR_COLUMN = "_ab_cdc_cursor"

View File

@@ -2,14 +2,15 @@
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.orchestration.db
package io.airbyte.cdk.load.table
import io.airbyte.cdk.load.command.DestinationCatalog
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.component.TableOperationsClient
import io.airbyte.cdk.load.orchestration.db.direct_load_table.DirectLoadInitialStatus
import io.airbyte.cdk.load.orchestration.db.direct_load_table.DirectLoadTableStatus
import io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping.TableCatalog
import io.airbyte.cdk.load.table.TableName
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.schema.model.TableNames
import io.airbyte.cdk.load.table.directload.DirectLoadInitialStatus
import io.airbyte.cdk.load.table.directload.DirectLoadTableStatus
import java.util.concurrent.ConcurrentHashMap
import kotlinx.coroutines.coroutineScope
import kotlinx.coroutines.launch
@@ -31,22 +32,21 @@ interface DatabaseInitialStatus
* ```
*/
fun interface DatabaseInitialStatusGatherer<InitialStatus : DatabaseInitialStatus> {
suspend fun gatherInitialStatus(streams: TableCatalog): Map<DestinationStream, InitialStatus>
suspend fun gatherInitialStatus(): Map<DestinationStream, InitialStatus>
}
abstract class BaseDirectLoadInitialStatusGatherer(
private val tableOperationsClient: TableOperationsClient,
private val tempTableNameGenerator: TempTableNameGenerator,
private val catalog: DestinationCatalog,
) : DatabaseInitialStatusGatherer<DirectLoadInitialStatus> {
override suspend fun gatherInitialStatus(
streams: TableCatalog
): Map<DestinationStream, DirectLoadInitialStatus> {
val map = ConcurrentHashMap<DestinationStream, DirectLoadInitialStatus>(streams.size)
override suspend fun gatherInitialStatus(): Map<DestinationStream, DirectLoadInitialStatus> {
val map =
ConcurrentHashMap<DestinationStream, DirectLoadInitialStatus>(catalog.streams.size)
coroutineScope {
streams.forEach { (stream, tableNameInfo) ->
catalog.streams.forEach { s ->
launch {
val tableName = tableNameInfo.tableNames.finalTableName!!
map[stream] = getInitialStatus(tableName)
val tableNames = s.tableSchema.tableNames
map[s] = getInitialStatus(tableNames)
}
}
}
@@ -65,10 +65,10 @@ abstract class BaseDirectLoadInitialStatusGatherer(
}
}
private suspend fun getInitialStatus(tableName: TableName): DirectLoadInitialStatus {
private suspend fun getInitialStatus(names: TableNames): DirectLoadInitialStatus {
return DirectLoadInitialStatus(
realTable = getTableStatus(tableName),
tempTable = getTableStatus(tempTableNameGenerator.generate(tableName)),
realTable = getTableStatus(names.finalTableName!!),
tempTable = getTableStatus(names.tempTableName!!),
)
}
}

View File

@@ -2,32 +2,16 @@
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.orchestration.db
package io.airbyte.cdk.load.table
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping.TypingDedupingUtil
import io.airbyte.cdk.load.table.TableName
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.table.TableSuffixes.TMP_TABLE_SUFFIX
import jakarta.inject.Singleton
import org.apache.commons.codec.digest.DigestUtils
data class TableNames(
// this is pretty dumb, but in theory we could have:
// * old-style implementation: raw+final tables both exist
// * only the raw table exists (i.e. T+D disabled)
// * only the final table exists (i.e. new-style direct-load tables)
val rawTableName: TableName?,
val finalTableName: TableName?,
) {
init {
check(rawTableName != null || finalTableName != null) {
"At least one table name should be nonnull"
}
}
fun toPrettyString() =
"Raw table: ${rawTableName?.toPrettyString()}; Final table: ${finalTableName?.toPrettyString()}"
}
// Commented out so CI won't be big mad
// @Deprecated("Deprecated in favor of TableSchemaMapper")
fun interface TempTableNameGenerator {
fun generate(originalName: TableName): TableName
}
@@ -39,7 +23,10 @@ fun interface TempTableNameGenerator {
*
* T+D destinations simply appended [TMP_TABLE_SUFFIX] to the table name, and should use
* [TableName.asOldStyleTempTable] instead
*
* Not deprecated, but the interface it implements is deprecated.
*/
@Singleton
open class DefaultTempTableNameGenerator(
private val internalNamespace: String? = null,
private val affixLength: Int = 8,
@@ -90,6 +77,8 @@ sealed interface TableNameGenerator {
fun interface RawTableNameGenerator : TableNameGenerator
// Commented out so CI won't be big mad
// @Deprecated("Deprecated in favor of TableSchemaMapper")
fun interface FinalTableNameGenerator : TableNameGenerator
fun interface ColumnNameGenerator {

View File

@@ -2,7 +2,7 @@
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping
package io.airbyte.cdk.load.table
import kotlin.math.max

View File

@@ -2,9 +2,9 @@
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.orchestration.db.direct_load_table
package io.airbyte.cdk.load.table.directload
import io.airbyte.cdk.load.orchestration.db.DatabaseInitialStatus
import io.airbyte.cdk.load.table.DatabaseInitialStatus
data class DirectLoadInitialStatus(
val realTable: DirectLoadTableStatus?,

View File

@@ -2,9 +2,9 @@
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.orchestration.db.direct_load_table
package io.airbyte.cdk.load.table.directload
import io.airbyte.cdk.load.table.TableName
import io.airbyte.cdk.load.schema.model.TableName
data class DirectLoadTableExecutionConfig(
val tableName: TableName,

View File

@@ -2,15 +2,15 @@
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.orchestration.db.direct_load_table
package io.airbyte.cdk.load.table.directload
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.component.TableOperationsClient
import io.airbyte.cdk.load.component.TableSchemaEvolutionClient
import io.airbyte.cdk.load.orchestration.db.TempTableNameGenerator
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.state.StreamProcessingFailed
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.cdk.load.table.TableName
import io.airbyte.cdk.load.table.TempTableNameGenerator
import io.airbyte.cdk.load.write.StreamLoader
import io.airbyte.cdk.load.write.StreamStateStore
import io.github.oshai.kotlinlogging.KotlinLogging

View File

@@ -11,7 +11,10 @@ import io.airbyte.cdk.load.data.FieldType
import io.airbyte.cdk.load.data.IntegerType
import io.airbyte.cdk.load.data.ObjectType
import io.airbyte.cdk.load.data.StringType
import io.airbyte.cdk.load.data.json.JsonSchemaToAirbyteType
import io.airbyte.cdk.load.schema.model.ColumnSchema
import io.airbyte.cdk.load.schema.model.StreamTableSchema
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.schema.model.TableNames
import io.airbyte.cdk.load.util.deserializeToNode
import io.airbyte.protocol.models.v0.AirbyteStream
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog
@@ -89,25 +92,6 @@ class DestinationCatalogTest {
),
)
@Test
fun roundTrip() {
val streamFactory =
DestinationStreamFactory(
JsonSchemaToAirbyteType(JsonSchemaToAirbyteType.UnionBehavior.DEFAULT),
namespaceMapper = NamespaceMapper()
)
val catalogFactory = DefaultDestinationCatalogFactory()
val destinationCatalog =
catalogFactory.getDestinationCatalog(
originalCatalog,
streamFactory,
operation = "write",
checkNamespace = null,
namespaceMapper = NamespaceMapper()
)
assertEquals(originalCatalog, destinationCatalog.asProtocolObject())
}
@Test
fun proxyOrderedSchema() {
val stream =
@@ -128,7 +112,23 @@ class DestinationCatalogTest {
"x" to FieldType(IntegerType, nullable = true),
)
),
namespaceMapper = NamespaceMapper()
namespaceMapper = NamespaceMapper(),
tableSchema =
StreamTableSchema(
tableNames = TableNames(finalTableName = TableName("namespace", "name")),
columnSchema =
ColumnSchema(
inputSchema =
linkedMapOf(
"z" to FieldType(StringType, nullable = true),
"y" to FieldType(BooleanType, nullable = true),
"x" to FieldType(IntegerType, nullable = true),
),
inputToFinalColumnNames = mapOf("z" to "z", "y" to "y", "x" to "x"),
finalSchema = mapOf(),
),
importType = Append,
)
)
val expectedOrderedSchema =
arrayOf(
@@ -158,6 +158,18 @@ class DestinationCatalogTest {
includeFiles = false,
schema = ObjectType(linkedMapOf()),
namespaceMapper = NamespaceMapper(),
tableSchema =
StreamTableSchema(
tableNames =
TableNames(finalTableName = TableName("default", "foo")),
columnSchema =
ColumnSchema(
inputSchema = mapOf(),
inputToFinalColumnNames = mapOf(),
finalSchema = mapOf(),
),
importType = Append,
)
),
DestinationStream(
unmappedNamespace = null,
@@ -169,6 +181,18 @@ class DestinationCatalogTest {
includeFiles = false,
schema = ObjectType(linkedMapOf()),
namespaceMapper = NamespaceMapper(),
tableSchema =
StreamTableSchema(
tableNames =
TableNames(finalTableName = TableName("default", "foo")),
columnSchema =
ColumnSchema(
inputSchema = mapOf(),
inputToFinalColumnNames = mapOf(),
finalSchema = mapOf(),
),
importType = Append,
)
),
)
)
@@ -193,6 +217,22 @@ class DestinationCatalogTest {
includeFiles = false,
schema = ObjectType(linkedMapOf()),
namespaceMapper = NamespaceMapper(),
tableSchema =
StreamTableSchema(
tableNames =
TableNames(finalTableName = TableName("default", "foo")),
columnSchema =
ColumnSchema(
inputSchema = mapOf(),
inputToFinalColumnNames = mapOf(),
finalSchema = mapOf(),
),
importType =
Dedupe(
primaryKey = listOf(listOf("id")),
cursor = emptyList()
),
)
)
)
)
@@ -226,6 +266,25 @@ class DestinationCatalogTest {
linkedMapOf("id" to FieldType(IntegerType, nullable = true))
),
namespaceMapper = NamespaceMapper(),
tableSchema =
StreamTableSchema(
tableNames =
TableNames(finalTableName = TableName("default", "foo")),
columnSchema =
ColumnSchema(
inputSchema =
linkedMapOf(
"id" to FieldType(IntegerType, nullable = true)
),
inputToFinalColumnNames = mapOf("id" to "id"),
finalSchema = mapOf(),
),
importType =
Dedupe(
primaryKey = listOf(listOf("id")),
cursor = listOf("updated_at"),
),
)
)
)
)

View File

@@ -4,7 +4,13 @@
package io.airbyte.cdk.load.command
import io.airbyte.cdk.load.data.FieldType
import io.airbyte.cdk.load.data.json.JsonSchemaToAirbyteType
import io.airbyte.cdk.load.schema.TableSchemaFactory
import io.airbyte.cdk.load.schema.model.ColumnSchema
import io.airbyte.cdk.load.schema.model.StreamTableSchema
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.schema.model.TableNames
import io.airbyte.protocol.models.JsonSchemaType
import io.airbyte.protocol.models.v0.CatalogHelpers
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream
@@ -12,6 +18,7 @@ import io.airbyte.protocol.models.v0.DestinationSyncMode
import io.airbyte.protocol.models.v0.Field
import io.mockk.every
import io.mockk.impl.annotations.MockK
import io.mockk.mockk
import kotlin.test.assertEquals
import kotlin.test.assertFailsWith
import kotlin.test.assertNull
@@ -51,7 +58,8 @@ class DestinationStreamUTest {
fun `test given no destination object name when make then no matching keys`() {
val configuredStream = a_configured_stream()
val stream = a_stream_factory().make(configuredStream)
val stream =
a_stream_factory().make(configuredStream, TableName("namespace", "a_stream_name"))
assertNull(stream.destinationObjectName)
assertNull(stream.matchingKey)
@@ -69,7 +77,8 @@ class DestinationStreamUTest {
)
)
val stream = a_stream_factory().make(configuredStream)
val stream =
a_stream_factory().make(configuredStream, TableName("namespace", "a_stream_name"))
assertEquals(stream.matchingKey, listOf("composite_key_1", "composite_key_2"))
assertEquals(stream.destinationObjectName, A_DESTINATION_OBJECT_NAME)
@@ -85,7 +94,9 @@ class DestinationStreamUTest {
)
assertFailsWith<IllegalArgumentException>(
block = { a_stream_factory().make(configuredStream) }
block = {
a_stream_factory().make(configuredStream, TableName("namespace", "a_stream_name"))
}
)
}
@@ -99,15 +110,36 @@ class DestinationStreamUTest {
)
assertFailsWith<IllegalArgumentException>(
block = { a_stream_factory().make(configuredStream) }
block = {
a_stream_factory().make(configuredStream, TableName("namespace", "a_stream_name"))
}
)
}
private fun a_stream_factory(): DestinationStreamFactory =
DestinationStreamFactory(
private fun a_stream_factory(): DestinationStreamFactory {
val mockSchemaFactory = mockk<TableSchemaFactory>()
every { mockSchemaFactory.make(any(), any(), any()) } answers
{
val finalTableName = firstArg<TableName>()
val inputSchema = secondArg<Map<String, FieldType>>()
val importType = thirdArg<io.airbyte.cdk.load.command.ImportType>()
StreamTableSchema(
tableNames = TableNames(finalTableName = finalTableName),
columnSchema =
ColumnSchema(
inputSchema = inputSchema,
inputToFinalColumnNames = inputSchema.keys.associateWith { it },
finalSchema = mapOf(),
),
importType = importType,
)
}
return DestinationStreamFactory(
JsonSchemaToAirbyteType(JsonSchemaToAirbyteType.UnionBehavior.DEFAULT),
namespaceMapper = NamespaceMapper(),
schemaFactory = mockSchemaFactory
)
}
private fun a_configured_stream(): ConfiguredAirbyteStream =
ConfiguredAirbyteStream()

View File

@@ -5,7 +5,11 @@
package io.airbyte.cdk.load.command
import io.airbyte.cdk.load.config.NamespaceDefinitionType
import io.mockk.mockk
import io.airbyte.cdk.load.data.ObjectType
import io.airbyte.cdk.load.schema.model.ColumnSchema
import io.airbyte.cdk.load.schema.model.StreamTableSchema
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.schema.model.TableNames
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test
@@ -22,8 +26,20 @@ class NamespaceMapperTest {
generationId = 1,
minimumGenerationId = 0,
syncId = 1,
schema = mockk(relaxed = true),
namespaceMapper = namespaceMapper
schema = ObjectType(linkedMapOf()),
namespaceMapper = namespaceMapper,
tableSchema =
StreamTableSchema(
tableNames =
TableNames(finalTableName = TableName(unmappedNamespace, unmappedName)),
columnSchema =
ColumnSchema(
inputSchema = mapOf(),
inputToFinalColumnNames = mapOf(),
finalSchema = mapOf(),
),
importType = Append,
)
)
}

View File

@@ -5,8 +5,8 @@
package io.airbyte.cdk.load.component
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.cdk.load.table.TableName
import org.junit.jupiter.api.Assertions.*
import org.junit.jupiter.api.Test

View File

@@ -93,7 +93,23 @@ class AirbyteValueProxyTest {
syncId = 1,
includeFiles = false,
schema = ALL_TYPES_SCHEMA,
namespaceMapper = NamespaceMapper()
namespaceMapper = NamespaceMapper(),
tableSchema =
io.airbyte.cdk.load.schema.model.StreamTableSchema(
tableNames =
io.airbyte.cdk.load.schema.model.TableNames(
finalTableName =
io.airbyte.cdk.load.schema.model.TableName("namespace", "name")
),
columnSchema =
io.airbyte.cdk.load.schema.model.ColumnSchema(
inputSchema = ALL_TYPES_SCHEMA.properties,
inputToFinalColumnNames =
ALL_TYPES_SCHEMA.properties.keys.associateWith { it },
finalSchema = mapOf(),
),
importType = Append,
)
)
private fun ifNull(value: JsonNode?): JsonNode? {

View File

@@ -26,7 +26,25 @@ class EnrichedDestinationRecordAirbyteValueTest {
generationId = 42L,
minimumGenerationId = 10L,
syncId = 100L,
namespaceMapper = NamespaceMapper()
namespaceMapper = NamespaceMapper(),
tableSchema =
io.airbyte.cdk.load.schema.model.StreamTableSchema(
tableNames =
io.airbyte.cdk.load.schema.model.TableNames(
finalTableName =
io.airbyte.cdk.load.schema.model.TableName(
"test_namespace",
"test_stream"
)
),
columnSchema =
io.airbyte.cdk.load.schema.model.ColumnSchema(
inputSchema = mapOf(),
inputToFinalColumnNames = mapOf(),
finalSchema = mapOf(),
),
importType = Append,
)
)
private val emittedAtMs = 1234567890L

View File

@@ -7,6 +7,7 @@ package io.airbyte.cdk.load.dataflow.stages
import io.airbyte.cdk.load.command.Append
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.command.NamespaceMapper
import io.airbyte.cdk.load.data.ObjectType
import io.airbyte.cdk.load.data.StringValue
import io.airbyte.cdk.load.dataflow.pipeline.DataFlowStageIO
import io.airbyte.cdk.load.dataflow.state.PartitionKey
@@ -50,11 +51,29 @@ class ParseStageTest {
unmappedNamespace = "test-namespace",
unmappedName = "test-stream",
importType = Append,
schema = io.airbyte.cdk.load.data.ObjectType(linkedMapOf()),
schema = ObjectType(linkedMapOf()),
generationId = 1L,
minimumGenerationId = 1L,
syncId = 1L,
namespaceMapper = NamespaceMapper()
namespaceMapper = NamespaceMapper(),
tableSchema =
io.airbyte.cdk.load.schema.model.StreamTableSchema(
tableNames =
io.airbyte.cdk.load.schema.model.TableNames(
finalTableName =
io.airbyte.cdk.load.schema.model.TableName(
"test-namespace",
"test-stream"
)
),
columnSchema =
io.airbyte.cdk.load.schema.model.ColumnSchema(
inputSchema = mapOf(),
inputToFinalColumnNames = mapOf(),
finalSchema = mapOf(),
),
importType = Append,
)
)
rawRecord =
DestinationRecordRaw(

View File

@@ -8,7 +8,6 @@ import io.airbyte.cdk.load.command.Append
import io.airbyte.cdk.load.command.DestinationCatalog
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.command.NamespaceMapper
import io.airbyte.cdk.load.data.StringType
import io.airbyte.cdk.load.dataflow.state.stats.EmissionStats
import io.airbyte.cdk.output.OutputConsumer
import io.airbyte.protocol.models.Jsons
@@ -235,11 +234,29 @@ class EmittedStatsStoreImplTest {
unmappedNamespace = namespace,
unmappedName = name,
importType = Append,
schema = StringType,
schema = io.airbyte.cdk.load.data.ObjectType(linkedMapOf()),
generationId = 1L,
minimumGenerationId = 1L,
syncId = 1L,
namespaceMapper = namespaceMapper
namespaceMapper = namespaceMapper,
tableSchema =
io.airbyte.cdk.load.schema.model.StreamTableSchema(
tableNames =
io.airbyte.cdk.load.schema.model.TableNames(
finalTableName =
io.airbyte.cdk.load.schema.model.TableName(
namespace ?: "default",
name
)
),
columnSchema =
io.airbyte.cdk.load.schema.model.ColumnSchema(
inputSchema = mapOf(),
inputToFinalColumnNames = mapOf(),
finalSchema = mapOf(),
),
importType = Append,
)
)
}
}

View File

@@ -16,7 +16,6 @@ import io.airbyte.cdk.load.data.StringType
import io.airbyte.cdk.load.data.StringValue
import io.airbyte.cdk.load.data.UnionType
import io.airbyte.cdk.load.dataflow.state.PartitionKey
import io.airbyte.cdk.load.dataflow.transform.ColumnNameMapper
import io.airbyte.cdk.load.dataflow.transform.ValidationResult
import io.airbyte.cdk.load.dataflow.transform.ValueCoercer
import io.airbyte.cdk.load.dataflow.transform.data.ValidationResultHandler
@@ -34,8 +33,6 @@ import org.junit.jupiter.api.extension.ExtendWith
@ExtendWith(MockKExtension::class)
class JsonRecordConversionTest {
@MockK lateinit var columnNameMapper: ColumnNameMapper
@MockK lateinit var valueCoercer: ValueCoercer
private lateinit var validationResultHandler: ValidationResultHandler
@@ -45,16 +42,14 @@ class JsonRecordConversionTest {
@BeforeEach
fun setup() {
validationResultHandler = ValidationResultHandler(mockk(relaxed = true))
jsonConverter = JsonConverter(columnNameMapper, valueCoercer, validationResultHandler)
jsonConverter = JsonConverter(valueCoercer, validationResultHandler)
}
@Test
fun `transforms record into map of munged keys and values`() {
// add "_munged" to every key so we can validate we get the mapped cols
every { columnNameMapper.getMappedColumnName(any(), any()) } answers
{
secondArg<String>() + "_munged"
}
// NOTE: columnNameMapper has been removed from the API
// Column name mapping is now handled by the stream's tableSchema
// This test has been modified to work with the new API
every { valueCoercer.validate(any<EnrichedAirbyteValue>()) } returns ValidationResult.Valid
@@ -87,10 +82,28 @@ class JsonRecordConversionTest {
"internal_field_2" to Fixtures.mockCoercedValue(IntegerValue(0)),
"internal_field_3" to Fixtures.mockCoercedValue(BooleanValue(true)),
)
// Mock the stream with tableSchema that provides column name mapping
val mockStream =
mockk<io.airbyte.cdk.load.command.DestinationStream> {
every { tableSchema } returns
mockk {
every { getFinalColumnName(any()) } answers
{
val columnName = firstArg<String>()
if (columnName.startsWith("user_field")) {
"${columnName}_munged"
} else {
columnName
}
}
}
}
val coerced =
mockk<EnrichedDestinationRecordAirbyteValue> {
every { declaredFields } answers { userFields }
every { airbyteMetaFields } answers { internalFields }
every { stream } returns mockStream
}
val input =

View File

@@ -9,7 +9,6 @@ import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.data.*
import io.airbyte.cdk.load.data.AirbyteValueProxy.FieldAccessor
import io.airbyte.cdk.load.dataflow.state.PartitionKey
import io.airbyte.cdk.load.dataflow.transform.ColumnNameMapper
import io.airbyte.cdk.load.dataflow.transform.ValidationResult
import io.airbyte.cdk.load.dataflow.transform.ValueCoercer
import io.airbyte.cdk.load.dataflow.transform.data.ValidationResultHandler
@@ -49,11 +48,6 @@ class ProtobufConverterTest {
every { validate(any()) } returns ValidationResult.Valid
}
private fun createMockMapperPassThrough(): ColumnNameMapper =
mockk<ColumnNameMapper> {
every { getMappedColumnName(any(), any()) } answers { secondArg<String>() }
}
private fun fa(name: String, type: AirbyteType, idx: Int): FieldAccessor = mockk {
every { this@mockk.name } returns name
every { this@mockk.type } returns type
@@ -134,7 +128,8 @@ class ProtobufConverterTest {
source: DestinationRecordProtobufSource = buildProtoSource(emptyList()),
generationId: Long = 1L,
syncId: Long = 2L,
unknownChanges: List<Meta.Change> = emptyList()
unknownChanges: List<Meta.Change> = emptyList(),
columnNameMapper: ((String) -> String)? = null
): DestinationRecordRaw {
val destinationStream =
mockk<DestinationStream> {
@@ -145,6 +140,15 @@ class ProtobufConverterTest {
every { mappedDescriptor } returns DestinationStream.Descriptor("namespace", "name")
every { unmappedDescriptor } returns
DestinationStream.Descriptor("namespace", "name")
// Add tableSchema mock
every { tableSchema } returns
mockk {
every { getFinalColumnName(any()) } answers
{
val columnName = firstArg<String>()
columnNameMapper?.invoke(columnName) ?: columnName
}
}
}
return mockk<DestinationRecordRaw> {
every { stream } returns destinationStream
@@ -156,9 +160,8 @@ class ProtobufConverterTest {
@Test
fun `convertWithMetadata processes basic types correctly`() {
val valueCoercer = createMockCoercerPassThrough()
val columnNameMapper = createMockMapperPassThrough()
val validationResultHandler = ValidationResultHandler(mockk(relaxed = true))
val converter = ProtobufConverter(columnNameMapper, valueCoercer, validationResultHandler)
val converter = ProtobufConverter(valueCoercer, validationResultHandler)
val accessors =
arrayOf(
@@ -268,9 +271,8 @@ class ProtobufConverterTest {
@Test
fun `convertWithMetadata handles BigDecimal values correctly`() {
val valueCoercer = createMockCoercerPassThrough()
val columnNameMapper = createMockMapperPassThrough()
val validationResultHandler = ValidationResultHandler(mockk(relaxed = true))
val converter = ProtobufConverter(columnNameMapper, valueCoercer, validationResultHandler)
val converter = ProtobufConverter(valueCoercer, validationResultHandler)
val accessors =
arrayOf(
@@ -311,9 +313,8 @@ class ProtobufConverterTest {
@Test
fun `convertWithMetadata handles null values`() {
val valueCoercer = createMockCoercerPassThrough()
val columnNameMapper = createMockMapperPassThrough()
val validationResultHandler = ValidationResultHandler(mockk(relaxed = true))
val converter = ProtobufConverter(columnNameMapper, valueCoercer, validationResultHandler)
val converter = ProtobufConverter(valueCoercer, validationResultHandler)
val accessors = arrayOf(fa("null_field", StringType, 0))
@@ -338,8 +339,7 @@ class ProtobufConverterTest {
every { validate(any()) } returns ValidationResult.Valid
}
val validationResultHandler = ValidationResultHandler(mockk(relaxed = true))
val columnNameMapper = createMockMapperPassThrough()
val converter = ProtobufConverter(columnNameMapper, valueCoercer, validationResultHandler)
val converter = ProtobufConverter(valueCoercer, validationResultHandler)
val accessors = arrayOf(fa("time_field", TimeTypeWithoutTimezone, 0))
val protoValues = listOf(vTimeNoTz(LocalTime.parse("12:34:56")))
@@ -384,9 +384,8 @@ class ProtobufConverterTest {
}
}
}
val columnNameMapper = createMockMapperPassThrough()
val validationResultHandler = ValidationResultHandler(mockk(relaxed = true))
val converter = ProtobufConverter(columnNameMapper, valueCoercer, validationResultHandler)
val converter = ProtobufConverter(valueCoercer, validationResultHandler)
val accessors = arrayOf(fa("short_string", StringType, 0), fa("long_string", StringType, 1))
val protoValues = listOf(vString("hello"), vString("this_is_too_long"))
@@ -406,24 +405,26 @@ class ProtobufConverterTest {
@Test
fun `convertWithMetadata applies column mapping`() {
val valueCoercer = createMockCoercerPassThrough()
val columnNameMapper =
object : ColumnNameMapper {
override fun getMappedColumnName(
stream: DestinationStream,
columnName: String
): String = if (columnName == "original_name") "mapped_name" else columnName
}
// NOTE: Column name mapping is now handled by the stream's tableSchema
// This test has been modified to work with the new API
val validationResultHandler = ValidationResultHandler(mockk(relaxed = true))
val converter = ProtobufConverter(columnNameMapper, valueCoercer, validationResultHandler)
val converter = ProtobufConverter(valueCoercer, validationResultHandler)
val accessors = arrayOf(fa("original_name", StringType, 0))
val protoValues = listOf(vString("test"))
val msg =
mockMsgWithStream(accessors, source = buildProtoSource(protoValues.map { it.build() }))
mockMsgWithStream(
accessors,
source = buildProtoSource(protoValues.map { it.build() }),
columnNameMapper = { columnName ->
if (columnName == "original_name") "mapped_name" else columnName
}
)
val result = converter.convert(ConversionInput(msg, PartitionKey("test-key")))
// Column mapping is now handled by tableSchema
assertFalse(result.containsKey("original_name"))
assertTrue(result.containsKey("mapped_name"))
assertEquals("test", (result["mapped_name"] as StringValue).value)
@@ -432,9 +433,8 @@ class ProtobufConverterTest {
@Test
fun `convertWithMetadata handles parsing exceptions`() {
val valueCoercer = createMockCoercerPassThrough()
val columnNameMapper = createMockMapperPassThrough()
val validationResultHandler = ValidationResultHandler(mockk(relaxed = true))
val converter = ProtobufConverter(columnNameMapper, valueCoercer, validationResultHandler)
val converter = ProtobufConverter(valueCoercer, validationResultHandler)
val accessors = arrayOf(fa("invalid_int", IntegerType, 0))
@@ -457,9 +457,8 @@ class ProtobufConverterTest {
@Test
fun `convertWithMetadata merges meta changes from source + stream unknown changes + parsing failures`() {
val valueCoercer = createMockCoercerPassThrough()
val columnNameMapper = createMockMapperPassThrough()
val validationResultHandler = ValidationResultHandler(mockk(relaxed = true))
val converter = ProtobufConverter(columnNameMapper, valueCoercer, validationResultHandler)
val converter = ProtobufConverter(valueCoercer, validationResultHandler)
val accessors = arrayOf(fa("ok_str", StringType, 0), fa("bad_int", IntegerType, 1))

View File

@@ -38,7 +38,6 @@ import io.airbyte.cdk.load.data.TimestampWithoutTimezoneValue
import io.airbyte.cdk.load.data.UnionType
import io.airbyte.cdk.load.data.UnknownType
import io.airbyte.cdk.load.dataflow.state.PartitionKey
import io.airbyte.cdk.load.dataflow.transform.ColumnNameMapper
import io.airbyte.cdk.load.dataflow.transform.ValidationResult
import io.airbyte.cdk.load.dataflow.transform.ValueCoercer
import io.airbyte.cdk.load.dataflow.transform.data.ValidationResultHandler
@@ -83,7 +82,6 @@ class ProtobufRecordConversionTest {
private val generationId = 314L
private lateinit var stream: DestinationStream
private lateinit var columnNameMapper: ColumnNameMapper
private lateinit var valueCoercer: ValueCoercer
private lateinit var validationResultHandler: ValidationResultHandler
private var protoSource: DestinationRecordProtobufSource? = null
@@ -93,15 +91,7 @@ class ProtobufRecordConversionTest {
@BeforeEach
fun setUp() {
columnNameMapper =
object : ColumnNameMapper {
override fun getMappedColumnName(
stream: DestinationStream,
columnName: String
): String {
return "mapped_$columnName"
}
}
// NOTE: Column name mapping is now handled by the stream's tableSchema
valueCoercer =
object : ValueCoercer {
@@ -281,6 +271,15 @@ class ProtobufRecordConversionTest {
DestinationStream.Descriptor("", "dummy")
every { this@mockk.unknownColumnChanges } returns
dummyType.computeUnknownColumnChanges()
// Add tableSchema mock for column name mapping
every { this@mockk.tableSchema } returns
mockk {
every { getFinalColumnName(any()) } answers
{
val columnName = firstArg<String>()
"mapped_$columnName"
}
}
}
record =
@@ -298,7 +297,7 @@ class ProtobufRecordConversionTest {
every { this@mockk.stream } returns this@ProtobufRecordConversionTest.stream
}
converter = ProtobufConverter(columnNameMapper, valueCoercer, validationResultHandler)
converter = ProtobufConverter(valueCoercer, validationResultHandler)
}
@AfterEach fun tearDown() = unmockkAll()

View File

@@ -71,7 +71,25 @@ internal class DestinationMessageTest {
generationId = 42,
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = namespaceMapper
namespaceMapper = namespaceMapper,
tableSchema =
io.airbyte.cdk.load.schema.model.StreamTableSchema(
tableNames =
io.airbyte.cdk.load.schema.model.TableNames(
finalTableName =
io.airbyte.cdk.load.schema.model.TableName(
descriptor.namespace ?: "default",
descriptor.name
)
),
columnSchema =
io.airbyte.cdk.load.schema.model.ColumnSchema(
inputSchema = mapOf(),
inputToFinalColumnNames = mapOf(),
finalSchema = mapOf(),
),
importType = Append,
)
)
)
),
@@ -614,6 +632,14 @@ internal class DestinationMessageTest {
@Test
fun `message factory creates record from protobuf`() {
// Note: can't be a mock or `schemaInAirbyteProxyOrder` won't return the correct value
val streamSchema =
ObjectType(
properties =
linkedMapOf(
"id" to FieldType(IntegerType, nullable = true),
"name" to FieldType(StringType, nullable = true)
)
)
val stream =
DestinationStream(
unmappedNamespace = "namespace",
@@ -622,15 +648,24 @@ internal class DestinationMessageTest {
generationId = 1,
minimumGenerationId = 0,
syncId = 1,
schema =
ObjectType(
properties =
linkedMapOf(
"id" to FieldType(IntegerType, nullable = true),
"name" to FieldType(StringType, nullable = true)
)
),
namespaceMapper = NamespaceMapper()
schema = streamSchema,
namespaceMapper = NamespaceMapper(),
tableSchema =
io.airbyte.cdk.load.schema.model.StreamTableSchema(
tableNames =
io.airbyte.cdk.load.schema.model.TableNames(
finalTableName =
io.airbyte.cdk.load.schema.model.TableName("namespace", "name")
),
columnSchema =
io.airbyte.cdk.load.schema.model.ColumnSchema(
inputSchema = streamSchema.properties,
inputToFinalColumnNames =
streamSchema.properties.keys.associateWith { it },
finalSchema = mapOf(),
),
importType = Append,
)
)
val catalog = DestinationCatalog(streams = listOf(stream))

View File

@@ -4,6 +4,7 @@
package io.airbyte.cdk.load.message
import io.airbyte.cdk.load.command.Append
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.command.NamespaceMapper
import io.airbyte.cdk.load.data.*
@@ -41,12 +42,31 @@ class DestinationRecordRawTest {
DestinationStream(
unmappedNamespace = "test_namespace",
unmappedName = "test_stream",
io.airbyte.cdk.load.command.Append,
Append,
recordSchema,
generationId = 42L,
minimumGenerationId = 0L,
syncId = 123L,
namespaceMapper = NamespaceMapper()
namespaceMapper = NamespaceMapper(),
tableSchema =
io.airbyte.cdk.load.schema.model.StreamTableSchema(
tableNames =
io.airbyte.cdk.load.schema.model.TableNames(
finalTableName =
io.airbyte.cdk.load.schema.model.TableName(
"test_namespace",
"test_stream"
)
),
columnSchema =
io.airbyte.cdk.load.schema.model.ColumnSchema(
inputSchema = recordSchema.properties,
inputToFinalColumnNames =
recordSchema.properties.keys.associateWith { it },
finalSchema = mapOf(),
),
importType = Append,
)
)
@Test
@@ -272,12 +292,30 @@ class DestinationRecordRawTest {
DestinationStream(
unmappedNamespace = "test_namespace",
unmappedName = "test_stream",
io.airbyte.cdk.load.command.Append,
Append,
emptySchema,
generationId = 42L,
minimumGenerationId = 0L,
syncId = 123L,
namespaceMapper = NamespaceMapper()
namespaceMapper = NamespaceMapper(),
tableSchema =
io.airbyte.cdk.load.schema.model.StreamTableSchema(
tableNames =
io.airbyte.cdk.load.schema.model.TableNames(
finalTableName =
io.airbyte.cdk.load.schema.model.TableName(
"test_namespace",
"test_stream"
)
),
columnSchema =
io.airbyte.cdk.load.schema.model.ColumnSchema(
inputSchema = mapOf(),
inputToFinalColumnNames = mapOf(),
finalSchema = mapOf(),
),
importType = Append,
)
)
val jsonData = """{"field1": "value1", "field2": 123}"""
@@ -352,12 +390,31 @@ class DestinationRecordRawTest {
DestinationStream(
unmappedNamespace = "test_namespace",
unmappedName = "test_stream",
io.airbyte.cdk.load.command.Append,
Append,
complexSchema,
generationId = 42L,
minimumGenerationId = 0L,
syncId = 123L,
namespaceMapper = NamespaceMapper()
namespaceMapper = NamespaceMapper(),
tableSchema =
io.airbyte.cdk.load.schema.model.StreamTableSchema(
tableNames =
io.airbyte.cdk.load.schema.model.TableNames(
finalTableName =
io.airbyte.cdk.load.schema.model.TableName(
"test_namespace",
"test_stream"
)
),
columnSchema =
io.airbyte.cdk.load.schema.model.ColumnSchema(
inputSchema = complexSchema.properties,
inputToFinalColumnNames =
complexSchema.properties.keys.associateWith { it },
finalSchema = mapOf(),
),
importType = Append,
)
)
val jsonData =

View File

@@ -52,7 +52,21 @@ class PipelineEventBookkeepingRouterTest {
1,
1,
1,
namespaceMapper = NamespaceMapper()
namespaceMapper = NamespaceMapper(),
tableSchema =
io.airbyte.cdk.load.schema.model.StreamTableSchema(
tableNames =
io.airbyte.cdk.load.schema.model.TableNames(
finalTableName = io.airbyte.cdk.load.schema.model.TableName("ns", "s1")
),
columnSchema =
io.airbyte.cdk.load.schema.model.ColumnSchema(
inputSchema = mapOf(),
inputToFinalColumnNames = mapOf(),
finalSchema = mapOf(),
),
importType = io.airbyte.cdk.load.command.Append,
)
)
private val stream2 =
DestinationStream(
@@ -63,7 +77,21 @@ class PipelineEventBookkeepingRouterTest {
1,
1,
1,
namespaceMapper = NamespaceMapper()
namespaceMapper = NamespaceMapper(),
tableSchema =
io.airbyte.cdk.load.schema.model.StreamTableSchema(
tableNames =
io.airbyte.cdk.load.schema.model.TableNames(
finalTableName = io.airbyte.cdk.load.schema.model.TableName("ns", "s2")
),
columnSchema =
io.airbyte.cdk.load.schema.model.ColumnSchema(
inputSchema = mapOf(),
inputToFinalColumnNames = mapOf(),
finalSchema = mapOf(),
),
importType = io.airbyte.cdk.load.command.Append,
)
)
private fun router(numDataChannels: Int, markEndOfStreamAtEnd: Boolean = false) =

View File

@@ -0,0 +1,146 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.schema
import io.mockk.every
import io.mockk.impl.annotations.MockK
import io.mockk.junit5.MockKExtension
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Assertions.assertThrows
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.extension.ExtendWith
@ExtendWith(MockKExtension::class)
class ColumnNameResolverTest {
@MockK private lateinit var mapper: TableSchemaMapper
@Test
fun `handles no collisions`() {
val resolver = ColumnNameResolver(mapper)
val columns = setOf("col1", "col2", "col3")
every { mapper.toColumnName("col1") } returns "col1"
every { mapper.toColumnName("col2") } returns "col2"
every { mapper.toColumnName("col3") } returns "col3"
every { mapper.colsConflict(any(), any()) } returns false
val result = resolver.getColumnNameMapping(columns)
assertEquals(3, result.size)
assertEquals("col1", result["col1"])
assertEquals("col2", result["col2"])
assertEquals("col3", result["col3"])
}
@Test
fun `handles simple collision with numeric suffix`() {
val resolver = ColumnNameResolver(mapper)
val columns = setOf("name", "Name")
every { mapper.toColumnName("name") } returns "name"
every { mapper.toColumnName("Name") } returns "name"
every { mapper.toColumnName("Name_1") } returns "name_1"
every { mapper.colsConflict(any(), any()) } answers { args[0] == args[1] }
val result = resolver.getColumnNameMapping(columns)
assertEquals(2, result.size)
assertEquals("name", result["name"])
assertEquals("name_1", result["Name"])
}
@Test
fun `handles multiple collisions with incremental suffixes`() {
val resolver = ColumnNameResolver(mapper)
val columns = setOf("col", "Col", "COL")
every { mapper.toColumnName("col") } returns "col"
every { mapper.toColumnName("Col") } returns "col"
every { mapper.toColumnName("COL") } returns "col"
every { mapper.toColumnName("Col_1") } returns "col_1"
every { mapper.toColumnName("COL_1") } returns "col_1"
every { mapper.toColumnName("COL_2") } returns "col_2"
every { mapper.colsConflict(any(), any()) } answers { args[0] == args[1] }
val result = resolver.getColumnNameMapping(columns)
assertEquals(3, result.size)
assertEquals("col", result["col"])
assertEquals("col_1", result["Col"])
assertEquals("col_2", result["COL"])
}
// We're testing some internals here, but I think it's important to validate this behavior as it
// represents an API contract with the destination. Any changes here will potentially affect
// customer destination schemas.
@Test
fun `handles truncation with super resolution`() {
val resolver = ColumnNameResolver(mapper)
val shortName = "short"
val longName1 = "a".repeat(100)
val longName2 = "a".repeat(50)
val columns = setOf("short", longName1, longName2)
every { mapper.toColumnName(shortName) } returns "short"
every { mapper.toColumnName(longName1) } returns "truncated"
every { mapper.toColumnName(longName2) } returns "truncated"
every { mapper.toColumnName("${longName1}_1") } returns "truncated"
every { mapper.toColumnName("${longName2}_1") } returns "truncated"
every { mapper.toColumnName("aa46aa") } returns "different"
every { mapper.colsConflict(any(), any()) } answers { args[0] == args[1] }
val result = resolver.getColumnNameMapping(columns)
assertEquals(3, result.size)
assertEquals("short", result["short"])
assertEquals("truncated", result[longName1])
assertEquals("different", result[longName2])
}
@Test
fun `throws exception when super resolution fails`() {
val resolver = ColumnNameResolver(mapper)
val shortName = "short"
val longName1 = "a".repeat(100)
val longName2 = "a".repeat(50)
val columns = setOf("short", longName1, longName2)
every { mapper.toColumnName(shortName) } returns "short"
every { mapper.toColumnName(longName1) } returns "truncated"
every { mapper.toColumnName(longName2) } returns "truncated"
every { mapper.toColumnName("${longName1}_1") } returns "truncated"
every { mapper.toColumnName("${longName2}_1") } returns "truncated"
every { mapper.toColumnName("aa46aa") } returns "truncated"
every { mapper.colsConflict(any(), any()) } answers { args[0] == args[1] }
assertThrows(IllegalArgumentException::class.java) {
resolver.getColumnNameMapping(columns)
}
}
@Test
fun `handles empty set`() {
val resolver = ColumnNameResolver(mapper)
val result = resolver.getColumnNameMapping(emptySet())
assertEquals(0, result.size)
}
@Test
fun `preserves original names when no processing needed`() {
val resolver = ColumnNameResolver(mapper)
val columns = setOf("valid_name_1", "valid_name_2")
every { mapper.toColumnName("valid_name_1") } returns "valid_name_1"
every { mapper.toColumnName("valid_name_2") } returns "valid_name_2"
every { mapper.colsConflict(any(), any()) } returns false
val result = resolver.getColumnNameMapping(columns)
assertEquals(2, result.size)
assertEquals("valid_name_1", result["valid_name_1"])
assertEquals("valid_name_2", result["valid_name_2"])
}
}

View File

@@ -0,0 +1,126 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.schema
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.schema.model.TableName
import io.mockk.every
import io.mockk.impl.annotations.MockK
import io.mockk.junit5.MockKExtension
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.extension.ExtendWith
@ExtendWith(MockKExtension::class)
class TableNameResolverTest {
@MockK private lateinit var mapper: TableSchemaMapper
@Test
fun `handles no collisions`() {
val resolver = TableNameResolver(mapper)
val desc1 = DestinationStream.Descriptor("namespace1", "stream1")
val desc2 = DestinationStream.Descriptor("namespace2", "stream2")
val descriptors = setOf(desc1, desc2)
val table1 = TableName("namespace1", "stream1")
val table2 = TableName("namespace2", "stream2")
every { mapper.toFinalTableName(desc1) } returns table1
every { mapper.toFinalTableName(desc2) } returns table2
val result = resolver.getTableNameMapping(descriptors)
assertEquals(2, result.size)
assertEquals(table1, result[desc1])
assertEquals(table2, result[desc2])
}
// We're testing some internals here but this represents an external API with the destination
// so it's worth preserving.
@Test
fun `handles table name collision with hash suffix`() {
val resolver = TableNameResolver(mapper)
val desc1 = DestinationStream.Descriptor("namespace", "stream1")
val desc2 = DestinationStream.Descriptor("namespace", "stream2")
val descriptors = setOf(desc1, desc2)
val collisionTableName = TableName("namespace", "same_table")
val hashedTableName = TableName("namespace", "stream2_hash")
every { mapper.toFinalTableName(any()) } returnsMany
listOf(
// call with desc1
collisionTableName,
// call with desc2
collisionTableName,
// call with desc2 and hash appended
hashedTableName,
)
val result = resolver.getTableNameMapping(descriptors)
assertEquals(2, result.size)
assertEquals(collisionTableName, result[desc1])
assertEquals(hashedTableName, result[desc2])
}
// We're testing some internals here but this represents an external API with the destination
// so it's worth preserving.
@Test
fun `handles multiple collisions`() {
val resolver = TableNameResolver(mapper)
val desc1 = DestinationStream.Descriptor("namespace", "stream1")
val desc2 = DestinationStream.Descriptor("namespace", "stream2")
val desc3 = DestinationStream.Descriptor("namespace", "stream3")
val descriptors = setOf(desc1, desc2, desc3)
val collisionTableName = TableName("namespace", "same_table")
val hashedTable2 = TableName("namespace", "stream2_hash")
val hashedTable3 = TableName("namespace", "stream3_hash")
every { mapper.toFinalTableName(any()) } returnsMany
listOf(
// call with desc1
collisionTableName,
// call with desc2
collisionTableName,
// call with desc2 and hash appended
hashedTable2,
// call with desc3
collisionTableName,
// call with desc3 and hash appended
hashedTable3,
)
val result = resolver.getTableNameMapping(descriptors)
assertEquals(3, result.size)
assertEquals(collisionTableName, result[desc1])
assertEquals(hashedTable2, result[desc2])
assertEquals(hashedTable3, result[desc3])
}
@Test
fun `handles empty set`() {
val resolver = TableNameResolver(mapper)
val result = resolver.getTableNameMapping(emptySet())
assertEquals(0, result.size)
}
@Test
fun `handles single stream`() {
val resolver = TableNameResolver(mapper)
val desc = DestinationStream.Descriptor("namespace", "stream")
val table = TableName("namespace", "stream")
every { mapper.toFinalTableName(desc) } returns table
val result = resolver.getTableNameMapping(setOf(desc))
assertEquals(1, result.size)
assertEquals(table, result[desc])
}
}

View File

@@ -0,0 +1,105 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.schema
import io.airbyte.cdk.load.command.Append
import io.airbyte.cdk.load.command.Dedupe
import io.airbyte.cdk.load.command.ImportType
import io.airbyte.cdk.load.component.ColumnType
import io.airbyte.cdk.load.data.FieldType
import io.airbyte.cdk.load.data.IntegerType
import io.airbyte.cdk.load.data.StringType
import io.airbyte.cdk.load.schema.model.StreamTableSchema
import io.airbyte.cdk.load.schema.model.TableName
import io.mockk.every
import io.mockk.impl.annotations.MockK
import io.mockk.junit5.MockKExtension
import java.util.stream.Stream
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.extension.ExtendWith
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.Arguments
import org.junit.jupiter.params.provider.MethodSource
@ExtendWith(MockKExtension::class)
class TableSchemaFactoryTest {
@MockK private lateinit var mapper: TableSchemaMapper
@MockK private lateinit var colNameResolver: ColumnNameResolver
@ParameterizedTest
@MethodSource("schemaTestCases")
fun `creates correct StreamTableSchema`(
inputSchema: Map<String, FieldType>,
importType: ImportType,
columnNameMapping: Map<String, String>
) {
val factory = TableSchemaFactory(mapper, colNameResolver)
val finalTableName = TableName("namespace", "table")
val tempTableName = TableName("namespace", "table_tmp")
every { mapper.toTempTableName(finalTableName) } returns tempTableName
every { colNameResolver.getColumnNameMapping(inputSchema.keys) } returns columnNameMapping
every { mapper.toColumnType(any()) } returns ColumnType("test_type", false)
every { mapper.toFinalSchema(any()) } answers { firstArg<StreamTableSchema>() }
val result = factory.make(finalTableName, inputSchema, importType)
assertEquals(finalTableName, result.tableNames.finalTableName)
assertEquals(tempTableName, result.tableNames.tempTableName)
assertEquals(inputSchema, result.columnSchema.inputSchema)
assertEquals(columnNameMapping, result.columnSchema.inputToFinalColumnNames)
assertEquals(importType, result.importType)
val expectedFinalSchema =
columnNameMapping
.map { (_, finalName) ->
val columnType = ColumnType("test_type", false)
finalName to columnType
}
.toMap()
assertEquals(expectedFinalSchema, result.columnSchema.finalSchema)
}
companion object {
@JvmStatic
fun schemaTestCases(): Stream<Arguments> =
Stream.of(
Arguments.of(
mapOf(
"id" to FieldType(IntegerType, false),
"name" to FieldType(StringType, false),
),
Append,
mapOf("id" to "id_final", "name" to "name_final")
),
Arguments.of(
mapOf(
"id" to FieldType(IntegerType, false),
"name" to FieldType(StringType, false),
"updated_at" to FieldType(StringType, false),
),
Dedupe(listOf(listOf("id")), listOf("updated_at")),
mapOf("id" to "id", "name" to "name", "updated_at" to "updated_at")
),
Arguments.of(emptyMap<String, FieldType>(), Append, emptyMap<String, String>()),
Arguments.of(
mapOf(
"id1" to FieldType(IntegerType, false),
"id2" to FieldType(IntegerType, false),
"data" to FieldType(StringType, false),
),
Dedupe(listOf(listOf("id1", "id2")), emptyList()),
mapOf("id1" to "id1", "id2" to "id2", "data" to "data")
),
Arguments.of(
mapOf("value" to FieldType(StringType, false)),
Append,
mapOf("value" to "value")
)
)
}
}

View File

@@ -0,0 +1,82 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.schema.defaults
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.component.ColumnType
import io.airbyte.cdk.load.data.BooleanType
import io.airbyte.cdk.load.data.FieldType
import io.airbyte.cdk.load.data.IntegerType
import io.airbyte.cdk.load.data.NumberType
import io.airbyte.cdk.load.data.StringType
import io.airbyte.cdk.load.schema.model.TableName
import java.util.stream.Stream
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Test
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.Arguments
import org.junit.jupiter.params.provider.MethodSource
class NoopTableSchemaMapperTest {
private val mapper = NoopTableSchemaMapper()
@Test
fun `toFinalTableName returns unchanged table name`() {
val desc1 = DestinationStream.Descriptor("namespace", "name")
val result1 = mapper.toFinalTableName(desc1)
Assertions.assertEquals(TableName("namespace", "name"), result1)
val desc2 = DestinationStream.Descriptor(null, "name")
val result2 = mapper.toFinalTableName(desc2)
Assertions.assertEquals(TableName("", "name"), result2)
}
@Test
fun `toTempTableName returns unchanged table name`() {
val tableName = TableName("namespace", "name")
val result = mapper.toTempTableName(tableName)
Assertions.assertEquals(tableName, result)
}
@Test
fun `toColumnName returns unchanged column name`() {
Assertions.assertEquals("column_name", mapper.toColumnName("column_name"))
Assertions.assertEquals("UPPERCASE", mapper.toColumnName("UPPERCASE"))
Assertions.assertEquals("123_numbers", mapper.toColumnName("123_numbers"))
Assertions.assertEquals("special@#chars", mapper.toColumnName("special@#chars"))
Assertions.assertEquals("", mapper.toColumnName(""))
}
@ParameterizedTest
@MethodSource("fieldTypeTestCases")
fun `toColumnType maps field types as strings`(
fieldType: FieldType,
) {
val result = mapper.toColumnType(fieldType)
Assertions.assertEquals(ColumnType(fieldType.type.toString(), fieldType.nullable), result)
}
@Test
fun `handles empty and special cases`() {
val emptyDesc = DestinationStream.Descriptor("", "")
Assertions.assertEquals(TableName("", ""), mapper.toFinalTableName(emptyDesc))
val emptyTable = TableName("", "")
Assertions.assertEquals(emptyTable, mapper.toTempTableName(emptyTable))
Assertions.assertEquals("", mapper.toColumnName(""))
}
companion object {
@JvmStatic
fun fieldTypeTestCases(): Stream<Arguments> =
Stream.of(
Arguments.of(FieldType(StringType, false)),
Arguments.of(FieldType(IntegerType, false)),
Arguments.of(FieldType(BooleanType, true)),
Arguments.of(FieldType(NumberType, false)),
)
}
}

View File

@@ -0,0 +1,249 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.schema.model
import io.airbyte.cdk.load.command.Append
import io.airbyte.cdk.load.command.Dedupe
import io.airbyte.cdk.load.component.ColumnType
import io.airbyte.cdk.load.data.FieldType
import io.airbyte.cdk.load.data.IntegerType
import io.airbyte.cdk.load.data.StringType
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Test
class StreamTableSchemaTest {
@Test
fun `getCursor returns mapped column names for dedupe`() {
val columnSchema =
ColumnSchema(
inputSchema = Fixtures.cursorColumns,
inputToFinalColumnNames = Fixtures.cursorColumnMapping,
finalSchema = Fixtures.cursorFinalSchema,
)
val streamTableSchema =
StreamTableSchema(
tableNames = Fixtures.defaultTableNames,
columnSchema = columnSchema,
importType =
Dedupe(
primaryKey = listOf(listOf("id")),
cursor = listOf("updated_at", "modified_date"),
),
)
val result = streamTableSchema.getCursor()
assertEquals(listOf("updated_at_final", "modified_date_final"), result)
}
@Test
fun `getCursor returns empty list for append`() {
val columnSchema =
ColumnSchema(
inputSchema = mapOf("updated_at" to FieldType(IntegerType, false)),
inputToFinalColumnNames = mapOf("updated_at" to "updated_at_final"),
finalSchema = mapOf("updated_at_final" to ColumnType("INTEGER", false)),
)
val streamTableSchema =
StreamTableSchema(
tableNames = Fixtures.defaultTableNames,
columnSchema = columnSchema,
importType = Append,
)
val result = streamTableSchema.getCursor()
assertEquals(emptyList<String>(), result)
}
@Test
fun `getPrimaryKey returns mapped column names for dedupe`() {
val columnSchema =
ColumnSchema(
inputSchema = Fixtures.primaryKeyColumns,
inputToFinalColumnNames = Fixtures.primaryKeyColumnMapping,
finalSchema = Fixtures.primaryKeyFinalSchema,
)
val streamTableSchema =
StreamTableSchema(
tableNames = Fixtures.defaultTableNames,
columnSchema = columnSchema,
importType =
Dedupe(
primaryKey = listOf(listOf("id"), listOf("user_id", "org_id")),
cursor = emptyList()
)
)
val result = streamTableSchema.getPrimaryKey()
assertEquals(listOf(listOf("id_final"), listOf("user_id_final", "org_id_final")), result)
}
@Test
fun `getPrimaryKey returns empty list for append`() {
val columnSchema =
ColumnSchema(
inputSchema = mapOf("id" to FieldType(IntegerType, false)),
inputToFinalColumnNames = mapOf("id" to "id_final"),
finalSchema = mapOf("id_final" to ColumnType("INTEGER", false))
)
val streamTableSchema =
StreamTableSchema(
tableNames = Fixtures.defaultTableNames,
columnSchema = columnSchema,
importType = Append,
)
val result = streamTableSchema.getPrimaryKey()
assertEquals(emptyList<List<String>>(), result)
}
@Test
fun `getFinalColumnName returns mapped name`() {
val columnSchema =
ColumnSchema(
inputSchema =
mapOf(
"original_name" to FieldType(StringType, false),
"another_column" to FieldType(IntegerType, false),
),
inputToFinalColumnNames =
mapOf("original_name" to "mapped_name", "another_column" to "another_mapped"),
finalSchema =
mapOf(
"mapped_name" to ColumnType("STRING", false),
"another_mapped" to ColumnType("INTEGER", false)
)
)
val streamTableSchema =
StreamTableSchema(
tableNames = Fixtures.defaultTableNames,
columnSchema = columnSchema,
importType = Append,
)
assertEquals("mapped_name", streamTableSchema.getFinalColumnName("original_name"))
assertEquals("another_mapped", streamTableSchema.getFinalColumnName("another_column"))
}
@Test
fun `handles empty cursor and primary key for dedupe`() {
val columnSchema =
ColumnSchema(
inputSchema = emptyMap(),
inputToFinalColumnNames = emptyMap(),
finalSchema = emptyMap()
)
val streamTableSchema =
StreamTableSchema(
tableNames = Fixtures.defaultTableNames,
columnSchema = columnSchema,
importType = Dedupe(primaryKey = emptyList(), cursor = emptyList())
)
assertEquals(emptyList<String>(), streamTableSchema.getCursor())
assertEquals(emptyList<List<String>>(), streamTableSchema.getPrimaryKey())
}
@Test
fun `handles complex composite primary key mapping`() {
val columnSchema =
ColumnSchema(
inputSchema = Fixtures.compositeKeyColumns,
inputToFinalColumnNames = Fixtures.compositeKeyColumnMapping,
finalSchema = Fixtures.compositeKeyFinalSchema,
)
val streamTableSchema =
StreamTableSchema(
tableNames = Fixtures.defaultTableNames,
columnSchema = columnSchema,
importType =
Dedupe(
primaryKey = listOf(listOf("tenant_id", "region_code", "product_id")),
cursor = listOf("tenant_id")
)
)
assertEquals(
listOf(listOf("TENANT_ID", "REGION_CODE", "PRODUCT_ID")),
streamTableSchema.getPrimaryKey()
)
assertEquals(listOf("TENANT_ID"), streamTableSchema.getCursor())
}
object Fixtures {
val defaultTableName = TableName("namespace", "table")
val defaultTableNames = TableNames(finalTableName = defaultTableName)
val cursorColumns =
mapOf(
"updated_at" to FieldType(IntegerType, false),
"modified_date" to FieldType(StringType, false),
)
val cursorColumnMapping =
mapOf(
"updated_at" to "updated_at_final",
"modified_date" to "modified_date_final",
)
val cursorFinalSchema =
mapOf(
"updated_at_final" to ColumnType("INTEGER", false),
"modified_date_final" to ColumnType("STRING", false),
)
val primaryKeyColumns =
mapOf(
"id" to FieldType(IntegerType, false),
"user_id" to FieldType(IntegerType, false),
"org_id" to FieldType(IntegerType, false),
)
val primaryKeyColumnMapping =
mapOf(
"id" to "id_final",
"user_id" to "user_id_final",
"org_id" to "org_id_final",
)
val primaryKeyFinalSchema =
mapOf(
"id_final" to ColumnType("INTEGER", false),
"user_id_final" to ColumnType("INTEGER", false),
"org_id_final" to ColumnType("INTEGER", false),
)
val compositeKeyColumns =
mapOf(
"tenant_id" to FieldType(IntegerType, false),
"region_code" to FieldType(StringType, false),
"product_id" to FieldType(IntegerType, false),
)
val compositeKeyColumnMapping =
mapOf(
"tenant_id" to "TENANT_ID",
"region_code" to "REGION_CODE",
"product_id" to "PRODUCT_ID",
)
val compositeKeyFinalSchema =
mapOf(
"TENANT_ID" to ColumnType("INTEGER", false),
"REGION_CODE" to ColumnType("STRING", false),
"PRODUCT_ID" to ColumnType("INTEGER", false),
)
}
}

View File

@@ -43,7 +43,22 @@ class CheckpointManagerUTest {
generationId = 10L,
minimumGenerationId = 10L,
syncId = 101L,
namespaceMapper = NamespaceMapper()
namespaceMapper = NamespaceMapper(),
tableSchema =
io.airbyte.cdk.load.schema.model.StreamTableSchema(
tableNames =
io.airbyte.cdk.load.schema.model.TableNames(
finalTableName =
io.airbyte.cdk.load.schema.model.TableName("test", "stream1")
),
columnSchema =
io.airbyte.cdk.load.schema.model.ColumnSchema(
inputSchema = mapOf(),
inputToFinalColumnNames = mapOf(),
finalSchema = mapOf(),
),
importType = Append,
)
)
private val stream2 =
@@ -55,7 +70,22 @@ class CheckpointManagerUTest {
generationId = 10L,
minimumGenerationId = 10L,
syncId = 101L,
namespaceMapper = NamespaceMapper()
namespaceMapper = NamespaceMapper(),
tableSchema =
io.airbyte.cdk.load.schema.model.StreamTableSchema(
tableNames =
io.airbyte.cdk.load.schema.model.TableNames(
finalTableName =
io.airbyte.cdk.load.schema.model.TableName("test", "stream2")
),
columnSchema =
io.airbyte.cdk.load.schema.model.ColumnSchema(
inputSchema = mapOf(),
inputToFinalColumnNames = mapOf(),
finalSchema = mapOf(),
),
importType = Append,
)
)
@BeforeEach

View File

@@ -8,6 +8,10 @@ import io.airbyte.cdk.load.data.FieldType
import io.airbyte.cdk.load.data.IntegerType
import io.airbyte.cdk.load.data.ObjectType
import io.airbyte.cdk.load.data.StringType
import io.airbyte.cdk.load.schema.model.ColumnSchema
import io.airbyte.cdk.load.schema.model.StreamTableSchema
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.schema.model.TableNames
import io.micronaut.context.annotation.Factory
import io.micronaut.context.annotation.Primary
import io.micronaut.context.annotation.Requires
@@ -18,8 +22,21 @@ import jakarta.inject.Singleton
* `@MicronautTest(environments = [ ..., MockDestinationCatalog])`.
*/
@Factory
class MockDestinationCatalogFactory : DestinationCatalogFactory {
class MockDestinationCatalogFactory {
companion object {
val tableNames = TableNames(finalTableName = TableName("test", "stream"))
val tableSchema =
StreamTableSchema(
columnSchema =
ColumnSchema(
inputSchema = mapOf(),
inputToFinalColumnNames = mapOf(),
finalSchema = mapOf(),
),
importType = Append,
tableNames = tableNames,
)
val stream1 =
DestinationStream(
unmappedNamespace = "test",
@@ -36,7 +53,8 @@ class MockDestinationCatalogFactory : DestinationCatalogFactory {
generationId = 42,
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = NamespaceMapper()
namespaceMapper = NamespaceMapper(),
tableSchema = tableSchema,
)
val stream2 =
DestinationStream(
@@ -54,14 +72,15 @@ class MockDestinationCatalogFactory : DestinationCatalogFactory {
generationId = 42,
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = NamespaceMapper()
namespaceMapper = NamespaceMapper(),
tableSchema = tableSchema,
)
}
@Singleton
@Primary
@Requires(env = ["MockDestinationCatalog"])
override fun make(): DestinationCatalog {
fun make(): DestinationCatalog {
return DestinationCatalog(streams = listOf(stream1, stream2))
}
}

View File

@@ -0,0 +1,259 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.component
import io.airbyte.cdk.load.command.Append
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.command.NamespaceMapper
import io.airbyte.cdk.load.data.*
import io.airbyte.cdk.load.dataflow.aggregate.AggregateFactory
import io.airbyte.cdk.load.dataflow.aggregate.StoreKey
import io.airbyte.cdk.load.dataflow.state.PartitionKey
import io.airbyte.cdk.load.dataflow.transform.RecordDTO
import io.airbyte.cdk.load.schema.model.ColumnSchema
import io.airbyte.cdk.load.schema.model.StreamTableSchema
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.schema.model.TableNames
import io.airbyte.cdk.load.write.DestinationWriter
import java.util.UUID
import kotlinx.coroutines.runBlocking
import org.junit.jupiter.api.Assertions.*
/**
* Validates basic Micronaut DI wiring and write path functionality.
*
* Tests:
* 1. all beans are injectable - Catches missing @Singleton, circular dependencies, missing beans
* 2. writer setup completes - Validates namespace creation and status gathering
* 3. can create append stream loader - Validates StreamLoader instantiation
* 4. stream loader start creates table - Validates table creation
* 5. can write one record - Full write path validation (most important)
*
* Setup:
* 1. Add testFixtures dependency to build.gradle
* 2. Create application-component.yml with airbyte.connector.operation="write"
* 3. Provide @Primary ConfiguredAirbyteCatalog bean (use DefaultComponentTestCatalog.make())
* 4. Start database in @BeforeAll (testcontainer or real instance)
* 5. If Writer requires catalog streams: inject DestinationCatalog and override createTestStream()
*
* Troubleshooting:
* - DI errors = test working correctly, add missing beans
* - "Property doesn't exist" = missing application-component.yml
* - "Catalog must have at least one stream" = missing ConfiguredAirbyteCatalog bean
* - NullPointerException in createStreamLoader = override createTestStream()
*/
interface ConnectorWiringSuite {
// Required: Provided by connector test via Micronaut injection
val writer: DestinationWriter
val client: TableOperationsClient
val aggregateFactory: AggregateFactory
// Optional: Override to provide custom test namespace (defaults to "test")
val testNamespace: String
get() = "test"
/**
* Test: All core beans are injectable without DI errors.
*
* Validates that Micronaut can create all required beans:
* - DestinationWriter
* - TableOperationsClient
* - AggregateFactory
*
* This catches missing @Singleton annotations, circular dependencies, and missing bean
* definitions.
*/
fun `all beans are injectable`() {
assertNotNull(writer, "DestinationWriter should be injectable")
assertNotNull(client, "TableOperationsClient should be injectable")
assertNotNull(aggregateFactory, "AggregateFactory should be injectable")
}
/**
* Test: Writer.setup() executes without errors.
*
* Validates:
* - Namespace creation works
* - Initial status gathering works
* - No crashes during setup phase
*/
fun `writer setup completes`() = runBlocking {
// Should not throw
writer.setup()
}
/**
* Test: Writer can create StreamLoader for append mode.
*
* Validates:
* - Writer.createStreamLoader() returns non-null
* - StreamLoader instantiation doesn't crash
* - Append mode is supported
*/
fun `can create append stream loader`() = runBlocking {
writer.setup()
val stream = createTestStream(importType = Append)
val loader = writer.createStreamLoader(stream)
assertNotNull(loader, "StreamLoader should be created for append mode")
}
/**
* Test: StreamLoader.start() creates tables.
*
* Validates:
* - StreamLoader.start() runs without error
* - Table is created in database
* - Table can be queried
*/
fun `stream loader start creates table`() = runBlocking {
writer.setup()
val stream = createTestStream()
val tableName = TableName(testNamespace, stream.mappedDescriptor.name)
try {
val loader = writer.createStreamLoader(stream)
// Start should create table
loader.start()
// Verify table exists
assertTrue(
client.tableExists(tableName),
"Table ${tableName} should exist after StreamLoader.start()"
)
} finally {
// Cleanup
client.dropTable(tableName)
}
}
/**
* Test: Write one record using StreamLoader (validates full write path).
*
* This is the most important test - validates the complete write path:
* - Sets up writer (namespace creation, initial status gathering)
* - Creates StreamLoader (which creates table)
* - Writes one record through aggregate/buffer
* - Verifies data appears in database
*
* If this test passes, your write path works end-to-end!
*/
fun `can write one record`() = runBlocking {
writer.setup()
val stream = createTestStream()
val tableName = TableName(testNamespace, stream.mappedDescriptor.name)
try {
// 1. Create namespace
client.createNamespace(testNamespace)
// 2. Create and start StreamLoader
val loader = writer.createStreamLoader(stream)
loader.start()
// 3. Create aggregate for this stream
val key = createStoreKey(stream)
val aggregate = aggregateFactory.create(key)
// 4. Write one record
val record = createTestRecord()
aggregate.accept(record)
aggregate.flush()
// 5. Verify data in database
val count = client.countTable(tableName)
assertEquals(1L, count, "Should have exactly 1 record after write. Got $count records.")
// 6. Close loader
loader.close(hadNonzeroRecords = true, streamFailure = null)
} finally {
// Cleanup
client.dropTable(tableName)
}
}
// ========== Helper Methods ==========
/**
* Creates a minimal test stream for validation. Override this if you need custom stream
* configuration.
*/
fun createTestStream(
namespace: String = "test",
name: String = "test_stream_${UUID.randomUUID()}",
importType: io.airbyte.cdk.load.command.ImportType = Append
): DestinationStream {
return DestinationStream(
unmappedNamespace = namespace,
unmappedName = name,
importType = importType,
schema =
ObjectType(
properties =
linkedMapOf(
"id" to FieldType(IntegerType, nullable = false),
"name" to FieldType(StringType, nullable = true)
)
),
generationId = 0,
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = NamespaceMapper(), // Default identity mapper
tableSchema =
StreamTableSchema(
columnSchema =
ColumnSchema(
inputSchema = mapOf(),
inputToFinalColumnNames = mapOf(),
finalSchema = mapOf(),
),
importType = Append,
tableNames = TableNames(finalTableName = TableName("namespace", "test")),
),
)
}
/**
* Creates a StoreKey for the given stream. Used to retrieve aggregate from factory.
*
* Note: StoreKey is a typealias for DestinationStream.Descriptor
*/
fun createStoreKey(stream: DestinationStream): StoreKey {
// StoreKey = DestinationStream.Descriptor
return stream.mappedDescriptor
}
/**
* Creates a simple column name mapping for test stream. Maps column names to themselves
* (identity mapping). Override if your database requires name transformation.
*/
fun createSimpleColumnMapping(): io.airbyte.cdk.load.table.ColumnNameMapping {
return io.airbyte.cdk.load.table.ColumnNameMapping(mapOf("id" to "id", "name" to "name"))
}
/**
* Creates a test record with all required Airbyte metadata columns. Override this if you need
* custom record structure.
*/
fun createTestRecord(): RecordDTO {
return RecordDTO(
fields =
mapOf(
// User columns
"id" to IntegerValue(1),
"name" to StringValue("Alice"),
// Airbyte metadata columns (required)
"_airbyte_raw_id" to StringValue(UUID.randomUUID().toString()),
"_airbyte_extracted_at" to TimestampWithTimezoneValue("2024-01-01T00:00:00Z"),
"_airbyte_meta" to ObjectValue(linkedMapOf()),
"_airbyte_generation_id" to IntegerValue(0)
),
partitionKey = PartitionKey(""), // Empty partition for non-partitioned streams
sizeBytes = 100,
emittedAtMs = System.currentTimeMillis()
)
}
}

View File

@@ -0,0 +1,68 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.component
import com.fasterxml.jackson.databind.node.JsonNodeFactory
import io.airbyte.protocol.models.v0.AirbyteStream
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream
import io.airbyte.protocol.models.v0.DestinationSyncMode
import io.airbyte.protocol.models.v0.SyncMode
/**
* Utility for creating a default ConfiguredAirbyteCatalog for component tests.
*
* Provides a catalog with schema matching ConnectorWiringSuite.createTestRecord():
* - id: integer
* - name: string
* - Airbyte metadata columns
*
* Usage in connector test config factory:
* @Singleton @Primary fun catalog() = DefaultComponentTestCatalog.make()
*/
object DefaultComponentTestCatalog {
fun make(): ConfiguredAirbyteCatalog {
val jsonNodeFactory = JsonNodeFactory.instance
val schema =
jsonNodeFactory.objectNode().apply {
put("type", "object")
set<Nothing>(
"properties",
jsonNodeFactory.objectNode().apply {
set<Nothing>(
"id",
jsonNodeFactory.objectNode().apply { put("type", "integer") }
)
set<Nothing>(
"name",
jsonNodeFactory.objectNode().apply { put("type", "string") }
)
}
)
}
val stream =
AirbyteStream()
.withName("test_stream")
.withNamespace("test")
.withJsonSchema(schema)
.withSupportedSyncModes(listOf(SyncMode.FULL_REFRESH))
.withSourceDefinedCursor(false)
.withSourceDefinedPrimaryKey(emptyList())
val configuredStream =
ConfiguredAirbyteStream()
.withStream(stream)
.withSyncMode(SyncMode.FULL_REFRESH)
.withDestinationSyncMode(DestinationSyncMode.APPEND)
.withCursorField(emptyList())
.withPrimaryKey(emptyList())
.withGenerationId(0L)
.withMinimumGenerationId(0L)
.withSyncId(42L)
return ConfiguredAirbyteCatalog().withStreams(listOf(configuredStream))
}
}

View File

@@ -4,8 +4,6 @@
package io.airbyte.cdk.load.component
import io.airbyte.cdk.load.command.Append
import io.airbyte.cdk.load.command.Dedupe
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.command.NamespaceMapper
import io.airbyte.cdk.load.data.AirbyteValue
@@ -30,11 +28,14 @@ import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_EXTRACTED_AT
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_GENERATION_ID
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_META
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_RAW_ID
import io.airbyte.cdk.load.schema.model.StreamTableSchema
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.table.CDC_DELETED_AT_COLUMN
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.cdk.load.table.TableName
import io.airbyte.cdk.load.util.Jsons
import io.airbyte.cdk.util.invert
import java.util.UUID
import org.junit.jupiter.api.Assertions
/**
* Common test fixtures and constants used across table operations test suites. Provides reusable
@@ -44,6 +45,7 @@ object TableOperationsFixtures {
// Common field names
const val TEST_FIELD = "test"
const val ID_FIELD = "id"
const val DESCRIPTION_FIELD = "description"
// Common schemas
val TEST_INTEGER_SCHEMA = ObjectType(linkedMapOf(TEST_FIELD to FieldType(IntegerType, true)))
@@ -63,6 +65,7 @@ object TableOperationsFixtures {
ID_FIELD to FieldType(StringType, true),
TEST_FIELD to FieldType(IntegerType, true),
CDC_DELETED_AT_COLUMN to FieldType(IntegerType, true),
DESCRIPTION_FIELD to FieldType(StringType, true),
),
)
@@ -113,6 +116,7 @@ object TableOperationsFixtures {
ID_FIELD to ID_FIELD,
TEST_FIELD to TEST_FIELD,
CDC_DELETED_AT_COLUMN to CDC_DELETED_AT_COLUMN,
DESCRIPTION_FIELD to DESCRIPTION_FIELD,
),
)
@@ -198,12 +202,16 @@ object TableOperationsFixtures {
val UPSERT_SOURCE_RECORDS: List<Map<String, AirbyteValue>> =
listOf(
inputRecord(
"5499cdef-1411-4c7e-987c-b22fe1284a49",
"109d38b9-e001-4f62-86ce-4a457ab013a1",
"2025-01-23T00:00:00Z",
linkedMapOf(),
generationId = 1,
ID_FIELD to StringValue("2"),
TEST_FIELD to IntegerValue(1001),
ID_FIELD to StringValue("0"),
TEST_FIELD to IntegerValue(1000),
DESCRIPTION_FIELD to
StringValue(
"New record, no existing record. Upsert should insert this record."
),
),
inputRecord(
"295eb05d-da91-4cf5-8d26-a2bf8b6e8ef7",
@@ -213,24 +221,22 @@ object TableOperationsFixtures {
ID_FIELD to StringValue("3"),
TEST_FIELD to IntegerValue(1002),
CDC_DELETED_AT_COLUMN to IntegerValue(1234),
DESCRIPTION_FIELD to
StringValue(
"New deletion record with later cursor and extracted_at than existing record. Upsert should delete the existing record."
),
),
inputRecord(
"9110dcf0-2171-4daa-a934-695163950d98",
"2025-01-23T00:00:00Z",
linkedMapOf(),
generationId = 1,
ID_FIELD to StringValue("4"),
TEST_FIELD to IntegerValue(4),
),
// There are two records with id=5, which differ only in extracted_at.
// The second record has non-null deleted_at, so we expect the record to be deleted.
inputRecord(
"35295b83-302f-49c3-af0f-cf093bc46def",
"2025-01-23T00:00:00Z",
linkedMapOf(),
generationId = 1,
ID_FIELD to StringValue("5"),
TEST_FIELD to IntegerValue(1004),
TEST_FIELD to IntegerValue(5),
DESCRIPTION_FIELD to
StringValue(
"Incoming record with no existing record, but there's a second incoming deletion record with later extracted_at. Upsert should discard this record."
),
),
inputRecord(
"5773cf6f-f8b7-48f2-8f23-728a4a4eb56d",
@@ -238,8 +244,155 @@ object TableOperationsFixtures {
linkedMapOf(),
generationId = 1,
ID_FIELD to StringValue("5"),
TEST_FIELD to IntegerValue(1005),
TEST_FIELD to IntegerValue(5),
CDC_DELETED_AT_COLUMN to IntegerValue(1234),
DESCRIPTION_FIELD to
StringValue("Incoming deletion record. This record should be discarded."),
),
inputRecord(
"1c4d0fc5-1e1e-4f7e-87c8-a46a722ee984",
"2025-01-23T00:00:00Z",
linkedMapOf(),
generationId = 1,
ID_FIELD to StringValue("6"),
TEST_FIELD to IntegerValue(6),
DESCRIPTION_FIELD to
StringValue(
"Incoming record with no existing record, but there's a second incoming record with later extracted_at. Upsert should discard this record."
),
),
inputRecord(
"2ddf5ee9-08a1-4319-824d-187d878edac5",
"2025-01-23T01:00:00Z",
linkedMapOf(),
generationId = 1,
ID_FIELD to StringValue("6"),
TEST_FIELD to IntegerValue(6),
DESCRIPTION_FIELD to
StringValue(
"Incoming record with no existing record. Upsert should insert this record."
),
),
inputRecord(
"e8379b8f-e437-4d55-9d16-76f5e6e942d6",
"2025-01-23T00:00:00Z",
linkedMapOf(),
generationId = 1,
ID_FIELD to StringValue("7"),
TEST_FIELD to IntegerValue(7),
CDC_DELETED_AT_COLUMN to IntegerValue(1234),
DESCRIPTION_FIELD to
StringValue(
"Incoming deletion record, but there's a second incoming record with later extracted_at. Upsert should discard this record."
),
),
inputRecord(
"e56fc753-b55a-439b-9b16-528596e2ca3a",
"2025-01-23T01:00:00Z",
linkedMapOf(),
generationId = 1,
ID_FIELD to StringValue("7"),
TEST_FIELD to IntegerValue(7),
DESCRIPTION_FIELD to
StringValue(
"Incoming record with no existing record. Upsert should insert this record."
),
),
inputRecord(
"645efad2-f1e6-438a-b29f-15ae5d096015",
"2025-01-23T00:00:00Z",
linkedMapOf(),
generationId = 1,
ID_FIELD to StringValue("8"),
TEST_FIELD to IntegerValue(8),
DESCRIPTION_FIELD to
StringValue(
"Incoming record with earlier cursor and later extracted_at than existing record. Upsert should discard this record (prefer cursor over extracted_at)."
),
),
inputRecord(
"f74b8ddb-45d0-4e30-af25-66885e57a0e6",
"2025-01-23T00:00:00Z",
linkedMapOf(),
generationId = 1,
ID_FIELD to StringValue("9"),
TEST_FIELD to IntegerValue(9),
DESCRIPTION_FIELD to
StringValue(
"Incoming record with equal cursor and later extracted_at than existing record. Upsert should update with this record (break ties with extracted_at)."
),
),
inputRecord(
"877cceb6-23a6-4e7b-92e3-59ca46f8fd6c",
"2025-01-23T00:00:00Z",
linkedMapOf(),
generationId = 1,
ID_FIELD to StringValue("10"),
TEST_FIELD to IntegerValue(1010),
DESCRIPTION_FIELD to
StringValue(
"Incoming record with later cursor and later extracted_at than existing record. Upsert should update with this record."
),
),
inputRecord(
"20410b34-7bb0-4ba5-9c61-0dd23bfeee6d",
"2025-01-22T00:00:00Z",
linkedMapOf(),
generationId = 1,
ID_FIELD to StringValue("11"),
TEST_FIELD to IntegerValue(11),
DESCRIPTION_FIELD to
StringValue(
"Incoming record with earlier cursor and equal extracted_at than existing record. Upsert should discard this record."
),
),
inputRecord(
"70fdf9b0-ade0-4d30-9131-ba217ef506da",
"2025-01-22T00:00:00Z",
linkedMapOf(),
generationId = 1,
ID_FIELD to StringValue("12"),
TEST_FIELD to IntegerValue(1012),
DESCRIPTION_FIELD to
StringValue(
"Incoming record with later cursor and equal extracted_at than existing record. Upsert should update with this record."
),
),
inputRecord(
"20949d9b-8ffc-4497-85e4-cda14abc4049",
"2025-01-21T00:00:00Z",
linkedMapOf(),
generationId = 1,
ID_FIELD to StringValue("13"),
TEST_FIELD to IntegerValue(13),
DESCRIPTION_FIELD to
StringValue(
"Incoming record with earlier cursor and earlier extracted_at than existing record. Upsert should discard this record."
),
),
inputRecord(
"5808a0ef-3c6d-4d9a-851c-edbbc4852e18",
"2025-01-21T00:00:00Z",
linkedMapOf(),
generationId = 1,
ID_FIELD to StringValue("14"),
TEST_FIELD to IntegerValue(14),
DESCRIPTION_FIELD to
StringValue(
"Incoming record with equal cursor and earlier extracted_at than existing record. Upsert should discard this record."
),
),
inputRecord(
"373127a7-a40e-4e23-890b-1a52114686ee",
"2025-01-21T00:00:00Z",
linkedMapOf(),
generationId = 1,
ID_FIELD to StringValue("15"),
TEST_FIELD to IntegerValue(1015),
DESCRIPTION_FIELD to
StringValue(
"Incoming record with later cursor and earlier extracted_at than existing record. Upsert should update with this record."
),
),
)
@@ -249,7 +402,6 @@ object TableOperationsFixtures {
*/
val UPSERT_TARGET_RECORDS: List<Map<String, AirbyteValue>> =
listOf(
// id=1 has no incoming record, so it should remain untouched.
inputRecord(
"6317026e-12f9-4713-976e-ce43901bd7ce",
"2025-01-22T00:00:00Z",
@@ -257,18 +409,11 @@ object TableOperationsFixtures {
1,
ID_FIELD to StringValue("1"),
TEST_FIELD to IntegerValue(1),
DESCRIPTION_FIELD to
StringValue(
"Existing record, no incoming record. Upsert should preserve this record."
),
),
// id=2 has a normal incoming record, which will overwrite this one.
inputRecord(
"46159e3a-9bf9-42d9-8bb7-9f47d37bd663",
"2025-01-22T00:00:00Z",
linkedMapOf(),
generationId = 1,
ID_FIELD to StringValue("2"),
TEST_FIELD to IntegerValue(2),
),
// id=3 has an incoming record with nonnull deleted_at, so this record should be
// deleted.
// TODO what about destinations with CDC soft deletes?
// https://github.com/airbytehq/airbyte-internal-issues/issues/14911
inputRecord(
@@ -278,22 +423,121 @@ object TableOperationsFixtures {
generationId = 1,
ID_FIELD to StringValue("3"),
TEST_FIELD to IntegerValue(3),
DESCRIPTION_FIELD to
StringValue(
"Existing record with incoming deletion record with later cursor and extracted_at. Upsert should delete this record."
),
),
// id=4 has an incoming record with the same cursor value (test=4) but later
// extracted_at.
// That record should replace this one.
inputRecord(
"02e22e03-587f-4d30-9718-994357407b65",
"8086bdd6-6cf5-479e-a819-e5f347373804",
"2025-01-22T00:00:00Z",
linkedMapOf(),
generationId = 1,
ID_FIELD to StringValue("4"),
TEST_FIELD to IntegerValue(4),
ID_FIELD to StringValue("8"),
TEST_FIELD to IntegerValue(1008),
DESCRIPTION_FIELD to
StringValue(
"Existing record with later cursor and earlier extracted_at than incoming record. Upsert should preserve this record (prefer cursor over extracted_at)."
),
),
inputRecord(
"b60e8b33-32f4-4da0-934b-87d14d9ed354",
"2025-01-22T00:00:00Z",
linkedMapOf(),
generationId = 1,
ID_FIELD to StringValue("9"),
TEST_FIELD to IntegerValue(9),
DESCRIPTION_FIELD to
StringValue(
"Existing record with equal cursor and earlier extracted_at than incoming record. Upsert should discard this record (break ties with extracted_at)."
),
),
inputRecord(
"e79d163e-b594-4016-89b9-a85e385778bd",
"2025-01-22T00:00:00Z",
linkedMapOf(),
generationId = 1,
ID_FIELD to StringValue("10"),
TEST_FIELD to IntegerValue(10),
DESCRIPTION_FIELD to
StringValue(
"Existing record with earlier cursor and earlier extracted_at than incoming record. Upsert should discard this record."
),
),
inputRecord(
"3d345fb2-254e-4968-89a6-f896a05fb831",
"2025-01-22T00:00:00Z",
linkedMapOf(),
generationId = 1,
ID_FIELD to StringValue("11"),
TEST_FIELD to IntegerValue(1011),
DESCRIPTION_FIELD to
StringValue(
"Existing record with later cursor and equal extracted_at than incoming record. Upsert should preserve this record."
),
),
inputRecord(
"9c5262e6-44e3-41de-9a5a-c31bc0efdb68",
"2025-01-22T00:00:00Z",
linkedMapOf(),
generationId = 1,
ID_FIELD to StringValue("12"),
TEST_FIELD to IntegerValue(12),
DESCRIPTION_FIELD to
StringValue(
"Existing record with earlier cursor and equal extracted_at than incoming record. Upsert should discard this record."
),
),
inputRecord(
"739a9347-267b-48af-a172-2030320e2193",
"2025-01-22T00:00:00Z",
linkedMapOf(),
generationId = 1,
ID_FIELD to StringValue("13"),
TEST_FIELD to IntegerValue(1013),
DESCRIPTION_FIELD to
StringValue(
"Existing record with later cursor and later extracted_at than incoming record. Upsert should preserve this record."
),
),
inputRecord(
"70243c59-eadb-4840-90fa-be4ed57609fc",
"2025-01-22T00:00:00Z",
linkedMapOf(),
generationId = 1,
ID_FIELD to StringValue("14"),
TEST_FIELD to IntegerValue(14),
DESCRIPTION_FIELD to
StringValue(
"Existing record with equal cursor and later extracted_at than incoming record. Upsert should preserve this record."
),
),
inputRecord(
"966e89ec-c0d2-4358-b8e5-bf9c713f5396",
"2025-01-22T00:00:00Z",
linkedMapOf(),
generationId = 1,
ID_FIELD to StringValue("15"),
TEST_FIELD to IntegerValue(15),
DESCRIPTION_FIELD to
StringValue(
"Existing record with earlier cursor and later extracted_at than existing record. Upsert should discard this record."
),
),
)
val UPSERT_EXPECTED_RECORDS: List<Map<String, Any>> =
listOf(
outputRecord(
"109d38b9-e001-4f62-86ce-4a457ab013a1",
"2025-01-23T00:00:00Z",
linkedMapOf(),
generationId = 1L,
ID_FIELD to "0",
TEST_FIELD to 1000L,
DESCRIPTION_FIELD to
"New record, no existing record. Upsert should insert this record.",
),
outputRecord(
"6317026e-12f9-4713-976e-ce43901bd7ce",
"2025-01-22T00:00:00Z",
@@ -301,22 +545,108 @@ object TableOperationsFixtures {
generationId = 1L,
ID_FIELD to "1",
TEST_FIELD to 1L,
DESCRIPTION_FIELD to
"Existing record, no incoming record. Upsert should preserve this record.",
),
outputRecord(
"5499cdef-1411-4c7e-987c-b22fe1284a49",
"2ddf5ee9-08a1-4319-824d-187d878edac5",
"2025-01-23T01:00:00Z",
linkedMapOf(),
1L,
ID_FIELD to "6",
TEST_FIELD to 6L,
DESCRIPTION_FIELD to
"Incoming record with no existing record. Upsert should insert this record.",
),
outputRecord(
"e56fc753-b55a-439b-9b16-528596e2ca3a",
"2025-01-23T01:00:00Z",
linkedMapOf(),
1L,
ID_FIELD to "7",
TEST_FIELD to 7L,
DESCRIPTION_FIELD to
"Incoming record with no existing record. Upsert should insert this record.",
),
outputRecord(
"8086bdd6-6cf5-479e-a819-e5f347373804",
"2025-01-22T00:00:00Z",
linkedMapOf(),
1L,
ID_FIELD to "8",
TEST_FIELD to 1008L,
DESCRIPTION_FIELD to
"Existing record with later cursor and earlier extracted_at than incoming record. Upsert should preserve this record (prefer cursor over extracted_at).",
),
outputRecord(
"f74b8ddb-45d0-4e30-af25-66885e57a0e6",
"2025-01-23T00:00:00Z",
linkedMapOf(),
1L,
ID_FIELD to "2",
TEST_FIELD to 1001L,
ID_FIELD to "9",
TEST_FIELD to 9L,
DESCRIPTION_FIELD to
"Incoming record with equal cursor and later extracted_at than existing record. Upsert should update with this record (break ties with extracted_at).",
),
outputRecord(
"9110dcf0-2171-4daa-a934-695163950d98",
"877cceb6-23a6-4e7b-92e3-59ca46f8fd6c",
"2025-01-23T00:00:00Z",
linkedMapOf(),
1L,
ID_FIELD to "4",
TEST_FIELD to 4L,
ID_FIELD to "10",
TEST_FIELD to 1010L,
DESCRIPTION_FIELD to
"Incoming record with later cursor and later extracted_at than existing record. Upsert should update with this record.",
),
outputRecord(
"3d345fb2-254e-4968-89a6-f896a05fb831",
"2025-01-22T00:00:00Z",
linkedMapOf(),
1L,
ID_FIELD to "11",
TEST_FIELD to 1011L,
DESCRIPTION_FIELD to
"Existing record with later cursor and equal extracted_at than incoming record. Upsert should preserve this record.",
),
outputRecord(
"70fdf9b0-ade0-4d30-9131-ba217ef506da",
"2025-01-22T00:00:00Z",
linkedMapOf(),
1L,
ID_FIELD to "12",
TEST_FIELD to 1012L,
DESCRIPTION_FIELD to
"Incoming record with later cursor and equal extracted_at than existing record. Upsert should update with this record.",
),
outputRecord(
"739a9347-267b-48af-a172-2030320e2193",
"2025-01-22T00:00:00Z",
linkedMapOf(),
1L,
ID_FIELD to "13",
TEST_FIELD to 1013L,
DESCRIPTION_FIELD to
"Existing record with later cursor and later extracted_at than incoming record. Upsert should preserve this record.",
),
outputRecord(
"70243c59-eadb-4840-90fa-be4ed57609fc",
"2025-01-22T00:00:00Z",
linkedMapOf(),
1L,
ID_FIELD to "14",
TEST_FIELD to 14L,
DESCRIPTION_FIELD to
"Existing record with equal cursor and later extracted_at than incoming record. Upsert should preserve this record.",
),
outputRecord(
"373127a7-a40e-4e23-890b-1a52114686ee",
"2025-01-21T00:00:00Z",
linkedMapOf(),
1L,
ID_FIELD to "15",
TEST_FIELD to 1015L,
DESCRIPTION_FIELD to
"Incoming record with later cursor and earlier extracted_at than existing record. Upsert should update with this record.",
),
)
@@ -343,10 +673,10 @@ object TableOperationsFixtures {
}
// Create common destination stream configurations
fun createAppendStream(
fun createStream(
namespace: String,
name: String,
schema: ObjectType,
tableSchema: StreamTableSchema,
generationId: Long = 1,
minimumGenerationId: Long = 0,
syncId: Long = 1,
@@ -354,40 +684,23 @@ object TableOperationsFixtures {
DestinationStream(
unmappedNamespace = namespace,
unmappedName = name,
importType = Append,
importType = tableSchema.importType,
generationId = generationId,
minimumGenerationId = minimumGenerationId,
syncId = syncId,
schema = schema,
schema = ObjectType(LinkedHashMap(tableSchema.columnSchema.inputSchema)),
namespaceMapper = NamespaceMapper(),
tableSchema = tableSchema,
)
fun createDedupeStream(
namespace: String,
name: String,
schema: ObjectType,
primaryKey: List<List<String>>,
cursor: List<String>,
generationId: Long = 1,
minimumGenerationId: Long = 0,
syncId: Long = 1,
): DestinationStream =
DestinationStream(
unmappedNamespace = namespace,
unmappedName = name,
importType =
Dedupe(
primaryKey = primaryKey,
cursor = cursor,
),
generationId = generationId,
minimumGenerationId = minimumGenerationId,
syncId = syncId,
schema = schema,
namespaceMapper = NamespaceMapper(),
)
fun <V> List<Map<String, V>>.sortBy(key: String) =
// sketchy unchecked cast is intentional, we're assuming that the tests are written such
// that the sort key is always comparable.
// In practice, it's generally some sort of ID column (int/string/etc.).
@Suppress("UNCHECKED_CAST") this.sortedBy { it[key] as Comparable<Any> }
fun <V> List<Map<String, V>>.sortByTestField() = this.sortedBy { it["test"] as Long }
fun <V> Map<String, V>.prettyString() =
"{" + this.entries.sortedBy { it.key }.joinToString(", ") + "}"
fun <V> List<Map<String, V>>.applyColumnNameMapping(mapping: ColumnNameMapping) =
map { record ->
@@ -398,7 +711,7 @@ object TableOperationsFixtures {
airbyteMetaColumnMapping: Map<String, String>
): List<Map<String, V>> {
val totalMapping = ColumnNameMapping(columnNameMapping + airbyteMetaColumnMapping)
return map { record -> record.mapKeys { (k, _) -> totalMapping.originalName(k) ?: k } }
return map { record -> record.mapKeys { (k, _) -> totalMapping.invert()[k] ?: k } }
}
fun <V> List<Map<String, V>>.removeNulls() =
@@ -431,6 +744,15 @@ object TableOperationsFixtures {
*pairs,
)
fun inputRecord(vararg pairs: Pair<String, AirbyteValue>) =
inputRecord(
rawId = UUID.randomUUID().toString(),
extractedAt = "2025-01-23T00:00:00Z",
meta = linkedMapOf(),
generationId = 1,
pairs = pairs,
)
fun outputRecord(
rawId: String,
extractedAt: String,
@@ -445,4 +767,16 @@ object TableOperationsFixtures {
COLUMN_NAME_AB_GENERATION_ID to generationId,
*pairs,
)
fun assertEquals(
expectedRecords: List<Map<String, Any?>>,
actualRecords: List<Map<String, Any?>>,
sortKey: String,
message: String,
) =
Assertions.assertEquals(
expectedRecords.sortBy(sortKey).joinToString("\n") { it.prettyString() },
actualRecords.sortBy(sortKey).joinToString("\n") { it.prettyString() },
message,
)
}

View File

@@ -4,10 +4,12 @@
package io.airbyte.cdk.load.component
import io.airbyte.cdk.load.command.Append
import io.airbyte.cdk.load.command.Dedupe
import io.airbyte.cdk.load.component.TableOperationsFixtures as Fixtures
import io.airbyte.cdk.load.component.TableOperationsFixtures.assertEquals
import io.airbyte.cdk.load.component.TableOperationsFixtures.insertRecords
import io.airbyte.cdk.load.component.TableOperationsFixtures.reverseColumnNameMapping
import io.airbyte.cdk.load.component.TableOperationsFixtures.sortByTestField
import io.airbyte.cdk.load.data.AirbyteValue
import io.airbyte.cdk.load.data.IntegerValue
import io.airbyte.cdk.load.data.ObjectValue
@@ -18,6 +20,7 @@ import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_EXTRACTED_AT
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_GENERATION_ID
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_META
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_RAW_ID
import io.airbyte.cdk.load.schema.TableSchemaFactory
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
import kotlinx.coroutines.test.runTest
@@ -48,6 +51,8 @@ interface TableOperationsSuite {
/** The database client instance to test. Must be properly configured and connected. */
val client: TableOperationsClient
val testClient: TestTableOperationsClient
val schemaFactory: TableSchemaFactory
// since ColumnNameMapping doesn't include the airbyte columns...
val airbyteMetaColumnMapping: Map<String, String>
get() = Meta.COLUMN_NAMES.associateWith { it }
@@ -84,16 +89,19 @@ interface TableOperationsSuite {
val testTable = Fixtures.generateTestTableName("table-test-table", testNamespace)
harness.assertTableDoesNotExist(testTable)
val tableSchema =
schemaFactory.make(testTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
try {
client.createTable(
tableName = testTable,
columnNameMapping = Fixtures.TEST_MAPPING,
stream =
Fixtures.createAppendStream(
Fixtures.createStream(
namespace = testTable.namespace,
name = testTable.name,
schema = Fixtures.TEST_INTEGER_SCHEMA,
tableSchema = tableSchema,
),
replace = false,
)
@@ -129,11 +137,20 @@ interface TableOperationsSuite {
val testTable = Fixtures.generateTestTableName("insert-test-table", testNamespace)
harness.assertTableDoesNotExist(testTable)
val tableSchema =
schemaFactory.make(testTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
val stream =
Fixtures.createStream(
namespace = testTable.namespace,
name = testTable.name,
tableSchema = tableSchema,
)
try {
harness.createTestTableAndVerifyExists(
tableName = testTable,
schema = Fixtures.TEST_INTEGER_SCHEMA,
columnNameMapping = columnNameMapping,
stream = stream,
)
testClient.insertRecords(testTable, inputRecords, columnNameMapping)
@@ -142,7 +159,7 @@ interface TableOperationsSuite {
assertEquals(
expectedRecords,
resultRecords.reverseColumnNameMapping(columnNameMapping, airbyteMetaColumnMapping)
resultRecords.reverseColumnNameMapping(columnNameMapping, airbyteMetaColumnMapping),
)
} finally {
harness.cleanupTable(testTable)
@@ -174,11 +191,20 @@ interface TableOperationsSuite {
val testTable = Fixtures.generateTestTableName("count-test-table", testNamespace)
harness.assertTableDoesNotExist(testTable)
val tableSchema =
schemaFactory.make(testTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
val stream =
Fixtures.createStream(
namespace = testTable.namespace,
name = testTable.name,
tableSchema = tableSchema,
)
try {
harness.createTestTableAndVerifyExists(
tableName = testTable,
schema = Fixtures.TEST_INTEGER_SCHEMA,
columnNameMapping = columnNameMapping,
stream = stream,
)
val records1 =
@@ -322,11 +348,20 @@ interface TableOperationsSuite {
val testTable = Fixtures.generateTestTableName("gen-id-test-table", testNamespace)
harness.assertTableDoesNotExist(testTable)
val tableSchema =
schemaFactory.make(testTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
val stream =
Fixtures.createStream(
namespace = testTable.namespace,
name = testTable.name,
tableSchema = tableSchema,
)
try {
harness.createTestTableAndVerifyExists(
tableName = testTable,
schema = Fixtures.TEST_INTEGER_SCHEMA,
columnNameMapping = columnNameMapping,
stream = stream,
)
val genId = 17L
@@ -382,18 +417,36 @@ interface TableOperationsSuite {
harness.assertTableDoesNotExist(sourceTable)
harness.assertTableDoesNotExist(targetTable)
val sourceTableSchema =
schemaFactory.make(sourceTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
val sourceStream =
Fixtures.createStream(
namespace = sourceTable.namespace,
name = sourceTable.name,
tableSchema = sourceTableSchema,
)
val targetTableSchema =
schemaFactory.make(targetTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
val targetStream =
Fixtures.createStream(
namespace = targetTable.namespace,
name = targetTable.name,
tableSchema = targetTableSchema,
)
try {
harness.createTestTableAndVerifyExists(
sourceTable,
Fixtures.TEST_INTEGER_SCHEMA,
columnNameMapping,
tableName = sourceTable,
columnNameMapping = columnNameMapping,
stream = sourceStream,
)
harness.insertAndVerifyRecordCount(sourceTable, sourceInputRecords, columnNameMapping)
harness.createTestTableAndVerifyExists(
targetTable,
Fixtures.TEST_INTEGER_SCHEMA,
columnNameMapping,
tableName = targetTable,
columnNameMapping = columnNameMapping,
stream = targetStream,
)
harness.insertAndVerifyRecordCount(targetTable, targetInputRecords, columnNameMapping)
@@ -402,13 +455,14 @@ interface TableOperationsSuite {
val overwrittenTableRecords = harness.readTableWithoutMetaColumns(targetTable)
assertEquals(
expectedRecords.sortByTestField(),
overwrittenTableRecords
.reverseColumnNameMapping(columnNameMapping, airbyteMetaColumnMapping)
.sortByTestField(),
) {
"Expected records were not in the overwritten table."
}
expectedRecords,
overwrittenTableRecords.reverseColumnNameMapping(
columnNameMapping,
airbyteMetaColumnMapping,
),
"test",
"Expected records were not in the overwritten table.",
)
assert(!client.tableExists(sourceTable)) {
"Source table: ${sourceTable.namespace}.${sourceTable.name} was not dropped as expected."
@@ -453,18 +507,36 @@ interface TableOperationsSuite {
harness.assertTableDoesNotExist(sourceTable)
harness.assertTableDoesNotExist(targetTable)
val sourceTableSchema =
schemaFactory.make(sourceTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
val sourceStream =
Fixtures.createStream(
namespace = sourceTable.namespace,
name = sourceTable.name,
tableSchema = sourceTableSchema,
)
val targetTableSchema =
schemaFactory.make(targetTable, Fixtures.TEST_INTEGER_SCHEMA.properties, Append)
val targetStream =
Fixtures.createStream(
namespace = targetTable.namespace,
name = targetTable.name,
tableSchema = targetTableSchema,
)
try {
harness.createTestTableAndVerifyExists(
sourceTable,
Fixtures.TEST_INTEGER_SCHEMA,
columnNameMapping,
tableName = sourceTable,
columnNameMapping = columnNameMapping,
stream = sourceStream,
)
harness.insertAndVerifyRecordCount(sourceTable, sourceInputRecords, columnNameMapping)
harness.createTestTableAndVerifyExists(
targetTable,
Fixtures.TEST_INTEGER_SCHEMA,
columnNameMapping,
tableName = targetTable,
columnNameMapping = columnNameMapping,
stream = targetStream,
)
harness.insertAndVerifyRecordCount(targetTable, targetInputRecords, columnNameMapping)
@@ -473,13 +545,14 @@ interface TableOperationsSuite {
val copyTableRecords = harness.readTableWithoutMetaColumns(targetTable)
assertEquals(
expectedRecords.sortByTestField(),
copyTableRecords
.reverseColumnNameMapping(columnNameMapping, airbyteMetaColumnMapping)
.sortByTestField(),
) {
"Expected source records were not copied to the target table."
}
expectedRecords,
copyTableRecords.reverseColumnNameMapping(
columnNameMapping,
airbyteMetaColumnMapping,
),
"test",
"Expected source records were not copied to the target table.",
)
} finally {
harness.cleanupTable(sourceTable)
harness.cleanupTable(targetTable)
@@ -518,31 +591,38 @@ interface TableOperationsSuite {
harness.assertTableDoesNotExist(sourceTable)
val sourceTableSchema =
schemaFactory.make(sourceTable, Fixtures.ID_TEST_WITH_CDC_SCHEMA.properties, Append)
val sourceStream =
Fixtures.createAppendStream(
Fixtures.createStream(
namespace = sourceTable.namespace,
name = sourceTable.name,
schema = Fixtures.ID_TEST_WITH_CDC_SCHEMA,
tableSchema = sourceTableSchema,
)
val targetTable = Fixtures.generateTestTableName("upsert-test-target-table", testNamespace)
harness.assertTableDoesNotExist(targetTable)
val targetTableSchema =
schemaFactory.make(
targetTable,
Fixtures.TEST_INTEGER_SCHEMA.properties,
Dedupe(
primaryKey = listOf(listOf(Fixtures.ID_FIELD)),
cursor = listOf(Fixtures.TEST_FIELD),
),
)
val targetStream =
Fixtures.createDedupeStream(
Fixtures.createStream(
namespace = targetTable.namespace,
name = targetTable.name,
schema = Fixtures.ID_TEST_WITH_CDC_SCHEMA,
primaryKey = listOf(listOf(Fixtures.ID_FIELD)),
cursor = listOf(Fixtures.TEST_FIELD),
tableSchema = targetTableSchema,
)
try {
harness.createTestTableAndVerifyExists(
tableName = sourceTable,
columnNameMapping = columnNameMapping,
schema = Fixtures.ID_AND_TEST_SCHEMA,
stream = sourceStream,
)
harness.insertAndVerifyRecordCount(sourceTable, sourceInputRecords, columnNameMapping)
@@ -550,7 +630,6 @@ interface TableOperationsSuite {
harness.createTestTableAndVerifyExists(
tableName = targetTable,
columnNameMapping = columnNameMapping,
schema = Fixtures.ID_TEST_WITH_CDC_SCHEMA,
stream = targetStream,
)
harness.insertAndVerifyRecordCount(targetTable, targetInputRecords, columnNameMapping)
@@ -560,13 +639,14 @@ interface TableOperationsSuite {
val upsertTableRecords = testClient.readTable(targetTable)
assertEquals(
expectedRecords.sortByTestField(),
upsertTableRecords
.reverseColumnNameMapping(columnNameMapping, airbyteMetaColumnMapping)
.sortByTestField(),
) {
"Upserted table did not contain expected records."
}
expectedRecords,
upsertTableRecords.reverseColumnNameMapping(
columnNameMapping,
airbyteMetaColumnMapping,
),
"id",
"Upserted table did not contain expected records.",
)
} finally {
harness.cleanupTable(sourceTable)
harness.cleanupTable(targetTable)

View File

@@ -5,12 +5,10 @@
package io.airbyte.cdk.load.component
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.component.TableOperationsFixtures.createAppendStream
import io.airbyte.cdk.load.component.TableOperationsFixtures.insertRecords
import io.airbyte.cdk.load.data.AirbyteValue
import io.airbyte.cdk.load.data.ObjectType
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.cdk.load.table.TableName
import io.github.oshai.kotlinlogging.KotlinLogging
import org.junit.jupiter.api.Assertions.assertEquals
@@ -29,14 +27,8 @@ class TableOperationsTestHarness(
/** Creates a test table with the given configuration and verifies it was created. */
suspend fun createTestTableAndVerifyExists(
tableName: TableName,
schema: ObjectType,
columnNameMapping: ColumnNameMapping,
stream: DestinationStream =
createAppendStream(
namespace = tableName.namespace,
name = tableName.name,
schema = schema,
)
stream: DestinationStream
) {
client.createTable(
stream = stream,

View File

@@ -0,0 +1,92 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.component
import io.airbyte.cdk.load.component.TableOperationsFixtures.inputRecord
import io.airbyte.cdk.load.data.BooleanValue
import io.airbyte.cdk.load.data.FieldType
import io.airbyte.cdk.load.data.IntegerType
import io.airbyte.cdk.load.data.IntegerValue
import io.airbyte.cdk.load.data.ObjectType
import io.airbyte.cdk.load.data.ObjectValue
import io.airbyte.cdk.load.data.StringType
import io.airbyte.cdk.load.data.StringValue
import io.airbyte.cdk.load.data.UnknownType
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.cdk.util.Jsons
object TableSchemaEvolutionFixtures {
val ID_AND_STRING_SCHEMA =
ObjectType(
linkedMapOf(
"id" to FieldType(IntegerType, true),
"test" to FieldType(StringType, true),
),
)
val ID_AND_UNKNOWN_SCHEMA =
ObjectType(
linkedMapOf(
"id" to FieldType(IntegerType, true),
"test" to FieldType(UnknownType(Jsons.readTree("""{"type": "potato"}""")), true),
),
)
val STRING_TO_UNKNOWN_TYPE_INPUT_RECORDS =
listOf(
inputRecord("id" to IntegerValue(1), "test" to StringValue("\"foo\"")),
inputRecord("id" to IntegerValue(2), "test" to StringValue("""{"foo": "bar"}""")),
inputRecord("id" to IntegerValue(3), "test" to StringValue("true")),
inputRecord("id" to IntegerValue(4), "test" to StringValue("0")),
inputRecord("id" to IntegerValue(5), "test" to StringValue("foo")),
)
val STRING_TO_UNKNOWN_TYPE_EXPECTED_RECORDS =
listOf(
mapOf("id" to 1L, "test" to "\"foo\""),
mapOf("id" to 2L, "test" to """{"foo": "bar"}"""),
mapOf("id" to 3L, "test" to "true"),
mapOf("id" to 4L, "test" to "0"),
mapOf("id" to 5L, "test" to "foo"),
)
val UNKNOWN_TO_STRING_TYPE_INPUT_RECORDS =
listOf(
inputRecord("id" to IntegerValue(1), "test" to StringValue("foo")),
inputRecord(
"id" to IntegerValue(2),
"test" to ObjectValue(linkedMapOf("foo" to StringValue("bar")))
),
inputRecord("id" to IntegerValue(3), "test" to BooleanValue(true)),
inputRecord("id" to IntegerValue(4), "test" to IntegerValue(0)),
)
val UNKNOWN_TO_STRING_TYPE_EXPECTED_RECORDS =
listOf(
mapOf("id" to 1L, "test" to "foo"),
mapOf("id" to 2L, "test" to """{"foo":"bar"}"""),
mapOf("id" to 3L, "test" to "true"),
mapOf("id" to 4L, "test" to "0"),
)
val APPLY_CHANGESET_INITIAL_COLUMN_MAPPING =
ColumnNameMapping(
mapOf(
"id" to "id",
"updated_at" to "updated_at",
"to_retain" to "to_retain",
"to_change" to "to_change",
"to_drop" to "to_drop",
)
)
val APPLY_CHANGESET_MODIFIED_COLUMN_MAPPING =
ColumnNameMapping(
mapOf(
"id" to "id",
"updated_at" to "updated_at",
"to_retain" to "to_retain",
"to_change" to "to_change",
"to_add" to "to_add",
)
)
val APPLY_CHANGESET_EXPECTED_EXTRACTED_AT = "2025-01-22T00:00:00Z"
}

View File

@@ -4,31 +4,32 @@
package io.airbyte.cdk.load.component
import io.airbyte.cdk.load.command.Append
import io.airbyte.cdk.load.command.Dedupe
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.command.ImportType
import io.airbyte.cdk.load.component.TableOperationsFixtures as Fixtures
import io.airbyte.cdk.load.component.TableOperationsFixtures.ID_FIELD
import io.airbyte.cdk.load.component.TableOperationsFixtures.TEST_FIELD
import io.airbyte.cdk.load.component.TableOperationsFixtures.assertEquals
import io.airbyte.cdk.load.component.TableOperationsFixtures.inputRecord
import io.airbyte.cdk.load.component.TableOperationsFixtures.insertRecords
import io.airbyte.cdk.load.component.TableOperationsFixtures.removeNulls
import io.airbyte.cdk.load.component.TableOperationsFixtures.reverseColumnNameMapping
import io.airbyte.cdk.load.data.AirbyteValue
import io.airbyte.cdk.load.data.FieldType
import io.airbyte.cdk.load.data.IntegerType
import io.airbyte.cdk.load.data.IntegerValue
import io.airbyte.cdk.load.data.ObjectType
import io.airbyte.cdk.load.data.ObjectValue
import io.airbyte.cdk.load.data.StringType
import io.airbyte.cdk.load.data.StringValue
import io.airbyte.cdk.load.data.TimestampWithTimezoneValue
import io.airbyte.cdk.load.message.Meta
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_EXTRACTED_AT
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_GENERATION_ID
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_META
import io.airbyte.cdk.load.message.Meta.Companion.COLUMN_NAME_AB_RAW_ID
import io.airbyte.cdk.load.schema.TableSchemaFactory
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.cdk.load.table.TableName
import io.micronaut.test.extensions.junit5.annotation.MicronautTest
import kotlin.test.assertEquals
import kotlinx.coroutines.test.runTest
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Assertions.assertTrue
import org.junit.jupiter.api.assertAll
@@ -40,6 +41,7 @@ interface TableSchemaEvolutionSuite {
val opsClient: TableOperationsClient
val testClient: TestTableOperationsClient
val schemaFactory: TableSchemaFactory
private val harness: TableOperationsTestHarness
get() = TableOperationsTestHarness(opsClient, testClient, airbyteMetaColumnMapping)
@@ -61,11 +63,13 @@ interface TableSchemaEvolutionSuite {
) = runTest {
val testNamespace = Fixtures.generateTestNamespace("namespace-test")
val testTable = Fixtures.generateTestTableName("table-test-table", testNamespace)
val tableSchema =
schemaFactory.make(testTable, Fixtures.ALL_TYPES_SCHEMA.properties, Append)
val stream =
Fixtures.createAppendStream(
Fixtures.createStream(
namespace = testTable.namespace,
name = testTable.name,
schema = Fixtures.ALL_TYPES_SCHEMA,
tableSchema = tableSchema,
)
opsClient.createNamespace(testNamespace)
@@ -97,11 +101,13 @@ interface TableSchemaEvolutionSuite {
) {
val testNamespace = Fixtures.generateTestNamespace("namespace-test")
val testTable = Fixtures.generateTestTableName("table-test-table", testNamespace)
val tableSchema =
schemaFactory.make(testTable, Fixtures.ALL_TYPES_SCHEMA.properties, Append)
val stream =
Fixtures.createAppendStream(
Fixtures.createStream(
namespace = testTable.namespace,
name = testTable.name,
schema = Fixtures.ALL_TYPES_SCHEMA,
tableSchema = tableSchema,
)
val computedSchema = client.computeSchema(stream, columnNameMapping)
assertEquals(expectedComputedSchema, computedSchema)
@@ -309,58 +315,96 @@ interface TableSchemaEvolutionSuite {
)
}
fun `basic apply changeset`() {
`basic apply changeset`(
initialColumnNameMapping =
ColumnNameMapping(
mapOf(
"to_retain" to "to_retain",
"to_change" to "to_change",
"to_drop" to "to_drop",
)
),
modifiedColumnNameMapping =
ColumnNameMapping(
mapOf(
"to_retain" to "to_retain",
"to_change" to "to_change",
"to_add" to "to_add",
)
),
fun `apply changeset - handle sync mode append`() {
`apply changeset`(Append, Append)
}
fun `apply changeset - handle changing sync mode from append to dedup`() {
`apply changeset`(Append, Dedupe(primaryKey = listOf(listOf("id")), cursor = emptyList()))
}
fun `apply changeset - handle changing sync mode from dedup to append`() {
`apply changeset`(Dedupe(primaryKey = listOf(listOf("id")), cursor = emptyList()), Append)
}
fun `apply changeset - handle sync mode dedup`() {
`apply changeset`(
Dedupe(primaryKey = listOf(listOf("id")), cursor = emptyList()),
Dedupe(primaryKey = listOf(listOf("id")), cursor = emptyList())
)
}
/**
* Execute a basic set of schema changes. We're not changing the sync mode, the types are just
* Execute a basic set of schema changes, across a variety of sync modes. The types are just
* string/int (i.e. no JSON), and there's no funky characters anywhere.
*
* You should not directly annotate this function with `@Test`. Instead:
* 1. If you need to modify any of the parameters, override this function (if the defaults
* ```
* work correctly, you can skip this step)
* ```
* 2. Annotate `@Test` onto [`apply changeset - append-append`], [`apply changeset -
* append-dedup`], etc.
*/
fun `basic apply changeset`(
fun `apply changeset`(
initialStreamImportType: ImportType,
modifiedStreamImportType: ImportType,
) {
`apply changeset`(
TableSchemaEvolutionFixtures.APPLY_CHANGESET_INITIAL_COLUMN_MAPPING,
TableSchemaEvolutionFixtures.APPLY_CHANGESET_MODIFIED_COLUMN_MAPPING,
// If your destination reads back timestamps in a nonstandard format, you can override
// this value to match that format.
TableSchemaEvolutionFixtures.APPLY_CHANGESET_EXPECTED_EXTRACTED_AT,
initialStreamImportType,
modifiedStreamImportType,
)
}
fun `apply changeset`(
initialColumnNameMapping: ColumnNameMapping,
modifiedColumnNameMapping: ColumnNameMapping
modifiedColumnNameMapping: ColumnNameMapping,
expectedExtractedAt: String,
initialStreamImportType: ImportType,
modifiedStreamImportType: ImportType,
) = runTest {
val testNamespace = Fixtures.generateTestNamespace("namespace-test")
val testTable = Fixtures.generateTestTableName("table-test-table", testNamespace)
val initialSchema =
ObjectType(
linkedMapOf(
"id" to FieldType(IntegerType, true),
"updated_at" to FieldType(IntegerType, true),
"to_retain" to FieldType(StringType, true),
"to_change" to FieldType(IntegerType, true),
"to_drop" to FieldType(StringType, true),
),
)
val initialTableSchema =
schemaFactory.make(testTable, initialSchema.properties, initialStreamImportType)
val initialStream =
Fixtures.createStream(
testTable.namespace,
testTable.name,
initialTableSchema,
)
val modifiedSchema =
ObjectType(
linkedMapOf(
"id" to FieldType(IntegerType, true),
"updated_at" to FieldType(IntegerType, true),
"to_retain" to FieldType(StringType, true),
"to_change" to FieldType(StringType, true),
"to_add" to FieldType(StringType, true),
),
)
val modifiedTableSchema =
schemaFactory.make(testTable, modifiedSchema.properties, modifiedStreamImportType)
val modifiedStream =
Fixtures.createAppendStream(
namespace = testTable.namespace,
name = testTable.name,
schema = modifiedSchema,
Fixtures.createStream(
testTable.namespace,
testTable.name,
modifiedTableSchema,
)
// Create the table and compute the schema changeset
@@ -371,16 +415,20 @@ interface TableSchemaEvolutionSuite {
initialColumnNameMapping,
modifiedSchema,
modifiedColumnNameMapping,
initialStream,
modifiedStream,
)
// Insert a record before applying the changeset
testClient.insertRecords(
testTable,
initialColumnNameMapping,
mapOf(
COLUMN_NAME_AB_RAW_ID to StringValue("fcc784dd-bf06-468e-ad59-666d5aaceae8"),
COLUMN_NAME_AB_EXTRACTED_AT to TimestampWithTimezoneValue("2025-01-22T00:00:00Z"),
COLUMN_NAME_AB_META to ObjectValue(linkedMapOf()),
COLUMN_NAME_AB_GENERATION_ID to IntegerValue(1),
inputRecord(
"fcc784dd-bf06-468e-ad59-666d5aaceae8",
"2025-01-22T00:00:00Z",
linkedMapOf(),
1,
"id" to IntegerValue(1234),
"updated_at" to IntegerValue(5678),
"to_retain" to StringValue("to_retain original value"),
"to_change" to IntegerValue(42),
"to_drop" to StringValue("to_drop original value"),
@@ -395,22 +443,33 @@ interface TableSchemaEvolutionSuite {
changeset,
)
val postAlterationRecords = harness.readTableWithoutMetaColumns(testTable)
Assertions.assertEquals(
// Many destinations fully recreate the table when changing the sync mode,
// so don't use harness.readTableWithoutMetaColumns.
// We need to assert that the meta columns were preserved.
val postAlterationRecords =
testClient
.readTable(testTable)
.removeNulls()
.reverseColumnNameMapping(modifiedColumnNameMapping, airbyteMetaColumnMapping)
assertEquals(
listOf(
mapOf(
"_airbyte_raw_id" to "fcc784dd-bf06-468e-ad59-666d5aaceae8",
"_airbyte_extracted_at" to expectedExtractedAt,
"_airbyte_meta" to linkedMapOf<String, Any?>(),
"_airbyte_generation_id" to 1L,
"id" to 1234L,
"updated_at" to 5678L,
"to_retain" to "to_retain original value",
// changed from int to string
"to_change" to "42",
// note the lack of `to_add` - new columns should be initialized to null
)
),
postAlterationRecords
.removeNulls()
.reverseColumnNameMapping(modifiedColumnNameMapping, airbyteMetaColumnMapping),
) {
postAlterationRecords,
"id",
"Expected records were not in the overwritten table."
}
)
val postAlterationDiscoveredSchema = client.discoverSchema(testTable)
val postAlterationChangeset =
@@ -421,6 +480,68 @@ interface TableSchemaEvolutionSuite {
)
}
/**
* Test that we can alter a column from StringType to UnknownType. In many destinations, this
* poses some challenges (e.g. naively casting VARCHAR to JSON may not work as expected).
*
* See also [`change from unknown type to string type`].
*/
fun `change from string type to unknown type`() {
`change from string type to unknown type`(
Fixtures.ID_AND_TEST_MAPPING,
Fixtures.ID_AND_TEST_MAPPING,
TableSchemaEvolutionFixtures.STRING_TO_UNKNOWN_TYPE_INPUT_RECORDS,
TableSchemaEvolutionFixtures.STRING_TO_UNKNOWN_TYPE_EXPECTED_RECORDS,
)
}
fun `change from string type to unknown type`(
initialColumnNameMapping: ColumnNameMapping,
modifiedColumnNameMapping: ColumnNameMapping,
inputRecords: List<Map<String, AirbyteValue>>,
expectedRecords: List<Map<String, Any?>>,
) =
executeAndVerifySchemaEvolution(
TableSchemaEvolutionFixtures.ID_AND_STRING_SCHEMA,
initialColumnNameMapping,
TableSchemaEvolutionFixtures.ID_AND_UNKNOWN_SCHEMA,
modifiedColumnNameMapping,
inputRecords,
expectedRecords,
)
/**
* Test that we can alter a column from UnknownType to StringType. In many destinations, this
* poses some challenges (e.g. naively casting JSON to VARCHAR may not work as expected).
*
* See also [`change from string type to unknown type`].
*/
fun `change from unknown type to string type`() {
`change from string type to unknown type`(
Fixtures.ID_AND_TEST_MAPPING,
Fixtures.ID_AND_TEST_MAPPING,
TableSchemaEvolutionFixtures.UNKNOWN_TO_STRING_TYPE_INPUT_RECORDS,
TableSchemaEvolutionFixtures.UNKNOWN_TO_STRING_TYPE_EXPECTED_RECORDS,
)
}
fun `change from unknown type to string type`(
initialColumnNameMapping: ColumnNameMapping,
modifiedColumnNameMapping: ColumnNameMapping,
inputRecords: List<Map<String, AirbyteValue>>,
expectedRecords: List<Map<String, Any?>>,
) =
executeAndVerifySchemaEvolution(
TableSchemaEvolutionFixtures.ID_AND_UNKNOWN_SCHEMA,
initialColumnNameMapping,
TableSchemaEvolutionFixtures.ID_AND_STRING_SCHEMA,
modifiedColumnNameMapping,
inputRecords,
expectedRecords,
)
// TODO add tests for funky chars (add/drop/change type; funky chars in PK/cursor)
/**
* Utility method for a typical schema evolution test. Creates a table with [initialSchema]
* using [initialColumnNameMapping], then computes the column changeset using [modifiedSchema]
@@ -434,20 +555,19 @@ interface TableSchemaEvolutionSuite {
initialColumnNameMapping: ColumnNameMapping,
modifiedSchema: ObjectType,
modifiedColumnNameMapping: ColumnNameMapping,
initialStream: DestinationStream =
Fixtures.createStream(
namespace = testTable.namespace,
name = testTable.name,
tableSchema = schemaFactory.make(testTable, initialSchema.properties, Append),
),
modifiedStream: DestinationStream =
Fixtures.createStream(
namespace = testTable.namespace,
name = testTable.name,
tableSchema = schemaFactory.make(testTable, modifiedSchema.properties, Append),
),
): SchemaEvolutionComputation {
val initialStream =
Fixtures.createAppendStream(
namespace = testTable.namespace,
name = testTable.name,
schema = initialSchema,
)
val modifiedStream =
Fixtures.createAppendStream(
namespace = testTable.namespace,
name = testTable.name,
schema = modifiedSchema,
)
opsClient.createNamespace(testTable.namespace)
opsClient.createTable(
tableName = testTable,
@@ -463,6 +583,57 @@ interface TableSchemaEvolutionSuite {
actualSchema,
expectedSchema,
columnChangeset,
modifiedStream,
)
}
/**
* Create a table using [initialSchema]; insert [inputRecords] to the table; execute a schema
* evolution to [modifiedSchema]; read back the table and verify that it contains
* [expectedRecords].
*
* By convention: the schemas should use the column name `id` to identify records.
*/
private fun executeAndVerifySchemaEvolution(
initialSchema: ObjectType,
initialColumnNameMapping: ColumnNameMapping,
modifiedSchema: ObjectType,
modifiedColumnNameMapping: ColumnNameMapping,
inputRecords: List<Map<String, AirbyteValue>>,
expectedRecords: List<Map<String, Any?>>,
) = runTest {
val testNamespace = Fixtures.generateTestNamespace("namespace-test")
val testTable = Fixtures.generateTestTableName("table-test-table", testNamespace)
// Create the table and compute the schema changeset
val (_, expectedSchema, changeset, modifiedStream) =
computeSchemaEvolution(
testTable,
initialSchema,
initialColumnNameMapping,
modifiedSchema,
modifiedColumnNameMapping,
)
testClient.insertRecords(testTable, inputRecords, initialColumnNameMapping)
client.applyChangeset(
modifiedStream,
modifiedColumnNameMapping,
testTable,
expectedSchema.columns,
changeset,
)
val postAlterationRecords =
harness
.readTableWithoutMetaColumns(testTable)
.reverseColumnNameMapping(modifiedColumnNameMapping, airbyteMetaColumnMapping)
assertEquals(
expectedRecords,
postAlterationRecords,
"id",
"",
)
}
@@ -470,5 +641,6 @@ interface TableSchemaEvolutionSuite {
val discoveredSchema: TableSchema,
val computedSchema: TableSchema,
val columnChangeset: ColumnChangeset,
val modifiedStream: DestinationStream,
)
}

View File

@@ -5,7 +5,7 @@
package io.airbyte.cdk.load.component
import io.airbyte.cdk.load.data.AirbyteValue
import io.airbyte.cdk.load.table.TableName
import io.airbyte.cdk.load.schema.model.TableName
interface TestTableOperationsClient {
/** Tests database connectivity. */

View File

@@ -51,6 +51,10 @@ import io.airbyte.cdk.load.message.Meta.Change
import io.airbyte.cdk.load.message.Meta.Companion.CHECKPOINT_ID_NAME
import io.airbyte.cdk.load.message.Meta.Companion.CHECKPOINT_INDEX_NAME
import io.airbyte.cdk.load.message.StreamCheckpoint
import io.airbyte.cdk.load.schema.model.ColumnSchema
import io.airbyte.cdk.load.schema.model.StreamTableSchema
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.schema.model.TableNames
import io.airbyte.cdk.load.state.CheckpointId
import io.airbyte.cdk.load.state.CheckpointIndex
import io.airbyte.cdk.load.state.CheckpointKey
@@ -380,6 +384,7 @@ abstract class BasicFunctionalityIntegrationTest(
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
val messages =
runSync(
@@ -492,6 +497,7 @@ abstract class BasicFunctionalityIntegrationTest(
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
val messages =
runSync(
@@ -668,6 +674,7 @@ abstract class BasicFunctionalityIntegrationTest(
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
val stream2 =
DestinationStream(
@@ -679,6 +686,7 @@ abstract class BasicFunctionalityIntegrationTest(
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
val stream3 =
DestinationStream(
@@ -690,6 +698,7 @@ abstract class BasicFunctionalityIntegrationTest(
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
val messages =
runSync(
@@ -1025,6 +1034,7 @@ abstract class BasicFunctionalityIntegrationTest(
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
val stream2 =
DestinationStream(
@@ -1036,6 +1046,7 @@ abstract class BasicFunctionalityIntegrationTest(
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
val stream3 =
DestinationStream(
@@ -1047,6 +1058,7 @@ abstract class BasicFunctionalityIntegrationTest(
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
val messages =
runSync(
@@ -1528,7 +1540,8 @@ abstract class BasicFunctionalityIntegrationTest(
generationId = 0,
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = namespaceMapperForMedium()
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
val messages =
runSync(
@@ -1652,7 +1665,8 @@ abstract class BasicFunctionalityIntegrationTest(
syncId = 42,
isFileBased = true,
includeFiles = true,
namespaceMapper = namespaceMapperForMedium()
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
val sourcePath = "path/to/file"
@@ -1744,7 +1758,8 @@ abstract class BasicFunctionalityIntegrationTest(
generationId = 0,
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = namespaceMapperForMedium()
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
val stateMessage =
runSyncUntilStateAckAndExpectFailure(
@@ -1831,7 +1846,8 @@ abstract class BasicFunctionalityIntegrationTest(
generationId = 0,
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = namespaceMapperForMedium()
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
val stream1 = makeStream(randomizedNamespace + "_1")
val stream2 = makeStream(randomizedNamespace + "_2")
@@ -1936,7 +1952,8 @@ abstract class BasicFunctionalityIntegrationTest(
generationId = 0,
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = namespaceMapperForMedium()
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
// Catalog with some weird schemas.
// Every stream has an int `id`, and maybe some string fields.
@@ -2066,7 +2083,8 @@ abstract class BasicFunctionalityIntegrationTest(
generationId = 42,
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = namespaceMapperForMedium()
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
runSync(
updatedConfig,
@@ -2120,7 +2138,8 @@ abstract class BasicFunctionalityIntegrationTest(
generationId,
minimumGenerationId,
syncId,
namespaceMapper = namespaceMapperForMedium()
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
val stream =
makeStream(
@@ -2252,7 +2271,8 @@ abstract class BasicFunctionalityIntegrationTest(
generationId,
minimumGenerationId,
syncId,
namespaceMapper = namespaceMapperForMedium()
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
val stream =
makeStream(
@@ -2366,7 +2386,8 @@ abstract class BasicFunctionalityIntegrationTest(
generationId = 41,
minimumGenerationId = 0,
syncId = 41,
namespaceMapper = namespaceMapperForMedium()
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
fun makeInputRecord(id: Int, updatedAt: String, extractedAt: Long) =
InputRecord(
@@ -2538,7 +2559,8 @@ abstract class BasicFunctionalityIntegrationTest(
generationId = 42,
minimumGenerationId = 42,
syncId = 42,
namespaceMapper = namespaceMapperForMedium()
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
fun makeInputRecord(id: Int, updatedAt: String, extractedAt: Long) =
InputRecord(
@@ -2663,7 +2685,8 @@ abstract class BasicFunctionalityIntegrationTest(
generationId = 41,
minimumGenerationId = 0,
syncId = 41,
namespaceMapper = namespaceMapperForMedium()
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
fun makeInputRecord(id: Int, updatedAt: String, extractedAt: Long) =
InputRecord(
@@ -2847,7 +2870,8 @@ abstract class BasicFunctionalityIntegrationTest(
generationId = 0,
minimumGenerationId = 0,
syncId,
namespaceMapper = namespaceMapperForMedium()
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
val stream = makeStream(syncId = 42)
runSync(
@@ -2917,7 +2941,8 @@ abstract class BasicFunctionalityIntegrationTest(
generationId = 0,
minimumGenerationId = 0,
syncId,
namespaceMapper = namespaceMapperForMedium()
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
val stream =
makeStream(
@@ -2997,6 +3022,7 @@ abstract class BasicFunctionalityIntegrationTest(
minimumGenerationId = 0,
syncId = 0,
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
val stream1 =
@@ -3075,7 +3101,8 @@ abstract class BasicFunctionalityIntegrationTest(
generationId = generationId,
minimumGenerationId = minimumGenerationId,
syncId,
namespaceMapper = namespaceMapperForMedium()
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
val stream =
makeStream(
@@ -3199,7 +3226,8 @@ abstract class BasicFunctionalityIntegrationTest(
generationId = 42,
minimumGenerationId = 0,
syncId = syncId,
namespaceMapper = namespaceMapperForMedium()
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
val sync1Stream = makeStream(syncId = 42)
fun makeRecord(data: String, extractedAt: Long) =
@@ -3407,7 +3435,8 @@ abstract class BasicFunctionalityIntegrationTest(
generationId = 0,
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = namespaceMapperForMedium()
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
runSync(
updatedConfig,
@@ -3479,7 +3508,8 @@ abstract class BasicFunctionalityIntegrationTest(
generationId = 42,
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = namespaceMapperForMedium()
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
val stream1 = makeStream("cursor1")
fun makeRecord(stream: DestinationStream, cursorName: String, emittedAtMs: Long) =
@@ -3552,6 +3582,7 @@ abstract class BasicFunctionalityIntegrationTest(
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
fun makeRecord(stream: DestinationStream, secondPk: String, emittedAtMs: Long) =
InputRecord(
@@ -3631,7 +3662,8 @@ abstract class BasicFunctionalityIntegrationTest(
generationId = 42,
minimumGenerationId = 42,
syncId = 42,
namespaceMapper = namespaceMapperForMedium()
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
}
val messages =
@@ -3689,7 +3721,8 @@ abstract class BasicFunctionalityIntegrationTest(
generationId = 42,
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = namespaceMapperForMedium()
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
fun makeRecord(data: String) =
InputRecord(
@@ -4167,7 +4200,8 @@ abstract class BasicFunctionalityIntegrationTest(
generationId = 42,
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = namespaceMapperForMedium()
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
fun makeRecord(data: String) =
InputRecord(
@@ -4334,7 +4368,8 @@ abstract class BasicFunctionalityIntegrationTest(
generationId = 42,
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = namespaceMapperForMedium()
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
runSync(
updatedConfig,
@@ -4504,7 +4539,8 @@ abstract class BasicFunctionalityIntegrationTest(
generationId = 42,
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = namespaceMapperForMedium()
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
fun runSync() =
@@ -4690,7 +4726,8 @@ abstract class BasicFunctionalityIntegrationTest(
generationId = 42,
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = namespaceMapperForMedium()
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
runSync(
updatedConfig,
@@ -4924,6 +4961,7 @@ abstract class BasicFunctionalityIntegrationTest(
minimumGenerationId = 0,
syncId = 12,
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
runSync(
updatedConfig,
@@ -4985,7 +5023,8 @@ abstract class BasicFunctionalityIntegrationTest(
generationId = 42,
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = namespaceMapperForMedium()
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
runSync(
updatedConfig,
@@ -5055,7 +5094,8 @@ abstract class BasicFunctionalityIntegrationTest(
generationId = 0,
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = namespaceMapperForMedium()
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
assertDoesNotThrow { runSync(updatedConfig, stream, messages = emptyList()) }
dumpAndDiffRecords(
@@ -5079,7 +5119,8 @@ abstract class BasicFunctionalityIntegrationTest(
generationId,
minimumGenerationId,
syncId,
namespaceMapper = namespaceMapperForMedium()
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
val firstStream = makeStream(generationId = 12, minimumGenerationId = 0, syncId = 42)
runSync(
@@ -5118,6 +5159,7 @@ abstract class BasicFunctionalityIntegrationTest(
minimumGenerationId = 1,
syncId = 42,
namespaceMapper = namespaceMapperForMedium(),
tableSchema = emptyTableSchema,
)
assertDoesNotThrow {
runSync(
@@ -5240,4 +5282,18 @@ abstract class BasicFunctionalityIntegrationTest(
NamespaceMapper(namespaceDefinitionType = NamespaceDefinitionType.SOURCE)
}
}
// This will get blown away in the tests as the DestinationStream's we are mocking just get
// converted to the protocol which has no concept of destination schemas
protected val emptyTableSchema: StreamTableSchema =
StreamTableSchema(
columnSchema =
ColumnSchema(
inputSchema = mapOf(),
inputToFinalColumnNames = mapOf(),
finalSchema = mapOf(),
),
importType = Append,
tableNames = TableNames(finalTableName = TableName("namespace", "test")),
)
}

View File

@@ -16,6 +16,10 @@ import io.airbyte.cdk.load.data.json.toAirbyteValue
import io.airbyte.cdk.load.message.DestinationFile
import io.airbyte.cdk.load.message.InputFile
import io.airbyte.cdk.load.message.InputRecord
import io.airbyte.cdk.load.schema.model.ColumnSchema
import io.airbyte.cdk.load.schema.model.StreamTableSchema
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.schema.model.TableNames
import io.airbyte.cdk.load.state.CheckpointId
import io.airbyte.cdk.load.test.util.destination_process.DestinationProcess
import io.airbyte.cdk.load.util.CloseableCoroutine
@@ -60,20 +64,34 @@ class SingleStreamInsert(
primaryKey = listOf(listOf(idColumn.name)),
cursor = listOf(idColumn.name),
)
val schema =
val schemaFields =
(listOf(idColumn) + columns).map {
Pair(it.name, FieldType(type = it.type, nullable = true))
}
val streamSchema = ObjectType(linkedMapOf(*schemaFields.toTypedArray()))
DestinationStream(
unmappedNamespace = randomizedNamespace,
unmappedName = streamName,
importType = importType,
schema = ObjectType(linkedMapOf(*schema.toTypedArray())),
schema = streamSchema,
generationId = generationId,
minimumGenerationId = minGenerationId,
syncId = 1,
namespaceMapper = NamespaceMapper()
namespaceMapper = NamespaceMapper(),
tableSchema =
StreamTableSchema(
tableNames =
TableNames(finalTableName = TableName(randomizedNamespace, streamName)),
columnSchema =
ColumnSchema(
inputSchema = streamSchema.properties,
inputToFinalColumnNames =
streamSchema.properties.keys.associateWith { it },
finalSchema = mapOf(),
),
importType = importType,
)
)
}
@@ -177,16 +195,29 @@ class SingleStreamFileTransfer(
) : PerformanceTestScenario {
private val log = KotlinLogging.logger {}
private val streamSchema = ObjectType(linkedMapOf())
private val stream =
DestinationStream(
unmappedNamespace = randomizedNamespace,
unmappedName = streamName,
importType = Append,
schema = ObjectType(linkedMapOf()),
schema = streamSchema,
generationId = 1,
minimumGenerationId = 0,
syncId = 1,
namespaceMapper = NamespaceMapper()
namespaceMapper = NamespaceMapper(),
tableSchema =
StreamTableSchema(
tableNames =
TableNames(finalTableName = TableName(randomizedNamespace, streamName)),
columnSchema =
ColumnSchema(
inputSchema = mapOf(),
inputToFinalColumnNames = mapOf(),
finalSchema = mapOf(),
),
importType = Append,
)
)
override val catalog: DestinationCatalog =
@@ -201,6 +232,20 @@ class SingleStreamFileTransfer(
minimumGenerationId = 1,
syncId = 101,
namespaceMapper = NamespaceMapper(),
tableSchema =
StreamTableSchema(
tableNames =
TableNames(
finalTableName = TableName(randomizedNamespace, streamName)
),
columnSchema =
ColumnSchema(
inputSchema = mapOf(),
inputToFinalColumnNames = mapOf(),
finalSchema = mapOf(),
),
importType = Append,
)
)
)
)
@@ -258,17 +303,30 @@ class SingleStreamFileAndMetadataTransfer(
) : PerformanceTestScenario {
private val log = KotlinLogging.logger {}
private val streamSchema = ObjectType(linkedMapOf())
private val stream =
DestinationStream(
unmappedNamespace = randomizedNamespace,
unmappedName = streamName,
importType = Append,
schema = ObjectType(linkedMapOf()),
schema = streamSchema,
generationId = 1,
minimumGenerationId = 0,
syncId = 1,
includeFiles = true,
namespaceMapper = NamespaceMapper()
namespaceMapper = NamespaceMapper(),
tableSchema =
StreamTableSchema(
tableNames =
TableNames(finalTableName = TableName(randomizedNamespace, streamName)),
columnSchema =
ColumnSchema(
inputSchema = mapOf(),
inputToFinalColumnNames = mapOf(),
finalSchema = mapOf(),
),
importType = Append,
)
)
override val catalog: DestinationCatalog =
@@ -283,7 +341,21 @@ class SingleStreamFileAndMetadataTransfer(
minimumGenerationId = 1,
syncId = 101,
includeFiles = true,
namespaceMapper = NamespaceMapper()
namespaceMapper = NamespaceMapper(),
tableSchema =
StreamTableSchema(
tableNames =
TableNames(
finalTableName = TableName(randomizedNamespace, streamName)
),
columnSchema =
ColumnSchema(
inputSchema = mapOf(),
inputToFinalColumnNames = mapOf(),
finalSchema = mapOf(),
),
importType = Append,
)
)
)
)
@@ -374,21 +446,36 @@ class MultiStreamInsert(
private val streams = run {
val importType = Append
val schema =
val schemaFields =
(listOf(idColumn) + columns).map {
Pair(it.name, FieldType(type = it.type, nullable = true))
}
(0 until numStreams).map {
(0 until numStreams).map { index ->
val streamSchema = ObjectType(linkedMapOf(*schemaFields.toTypedArray()))
val streamName = "${streamNamePrefix}__$index"
DestinationStream(
unmappedNamespace = randomizedNamespace,
unmappedName = "${streamNamePrefix}__$it",
unmappedName = streamName,
importType = importType,
schema = ObjectType(linkedMapOf(*schema.toTypedArray())),
schema = streamSchema,
generationId = generationId,
minimumGenerationId = minGenerationId,
syncId = 1,
namespaceMapper = NamespaceMapper()
namespaceMapper = NamespaceMapper(),
tableSchema =
StreamTableSchema(
tableNames =
TableNames(finalTableName = TableName(randomizedNamespace, streamName)),
columnSchema =
ColumnSchema(
inputSchema = streamSchema.properties,
inputToFinalColumnNames =
streamSchema.properties.keys.associateWith { it },
finalSchema = mapOf(),
),
importType = importType,
)
)
}
}

View File

@@ -0,0 +1,151 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.write
import io.airbyte.cdk.command.ConfigurationSpecification
import io.airbyte.cdk.load.test.util.FakeDataDumper
import io.airbyte.cdk.load.test.util.IntegrationTest
import io.airbyte.cdk.load.test.util.NoopDestinationCleaner
import io.airbyte.cdk.load.test.util.NoopExpectedRecordMapper
import jakarta.inject.Inject
import org.junit.jupiter.api.Assertions.assertNotNull
import org.junit.jupiter.api.Test
/**
* Validates write operation can initialize with real catalog loading.
*
* Tests that all beans required for catalog processing exist:
* - RawTableNameGenerator, FinalTableNameGenerator, ColumnNameGenerator
* - ColumnNameMapper, TableCatalog factory dependencies
*
* Complements ConnectorWiringSuite:
* - ConnectorWiringSuite: Fast component test, validates write path
* - WriteInitializationTest: Integration test, validates catalog loading
*
* Usage: class MyWriteInitTest : WriteInitializationTest<MySpecification>(
* ```
* configContents = File("secrets/config.json").readText(),
* configSpecClass = MySpecification::class.java,
* ```
* )
*
* Troubleshooting:
* - DI errors = missing bean (add to BeanFactory or mark @Singleton)
* - File not found = create secrets/config.json with valid credentials
*/
abstract class WriteInitializationTest<T : ConfigurationSpecification>(
val configContents: String,
val configSpecClass: Class<T>,
additionalMicronautEnvs: List<String> = emptyList(),
) :
IntegrationTest(
additionalMicronautEnvs = additionalMicronautEnvs,
dataDumper = FakeDataDumper,
destinationCleaner = NoopDestinationCleaner,
recordMangler = NoopExpectedRecordMapper,
) {
@Inject lateinit var writer: DestinationWriter
/**
* Validates all beans for catalog loading exist.
*
* Creates write process with real catalog to ensure:
* - DestinationCatalog can be created from catalog JSON
* - TableCatalog factory can create catalog with name generators
* - DestinationWriter can be instantiated
*
* DI errors here = missing beans (same errors that would crash Docker runtime).
*/
@Test
fun `writer can be instantiated with real catalog`() {
// Create minimal catalog for testing (with all required fields)
val catalog =
io.airbyte.protocol.models.v0
.ConfiguredAirbyteCatalog()
.withStreams(
listOf(
io.airbyte.protocol.models.v0
.ConfiguredAirbyteStream()
.withStream(
io.airbyte.protocol.models.v0
.AirbyteStream()
.withName("write_init_test")
.withNamespace("test")
.withJsonSchema(
com.fasterxml.jackson.databind.node.JsonNodeFactory.instance
.objectNode()
.put("type", "object")
.set(
"properties",
com.fasterxml.jackson.databind.node.JsonNodeFactory
.instance
.objectNode()
.set(
"id",
com.fasterxml.jackson.databind.node
.JsonNodeFactory
.instance
.objectNode()
.put("type", "integer")
)
)
)
)
.withSyncMode(io.airbyte.protocol.models.v0.SyncMode.FULL_REFRESH)
.withDestinationSyncMode(
io.airbyte.protocol.models.v0.DestinationSyncMode.APPEND
)
.withGenerationId(0L)
.withMinimumGenerationId(0L)
.withSyncId(42L)
)
)
// Just CREATE the process - DI will fail if beans are missing
// We don't actually RUN it (that would hang waiting for stdin)
try {
val process =
destinationProcessFactory.createDestinationProcess(
command = "write",
configContents = configContents,
catalog = catalog,
)
// If we get here, DI succeeded!
// Process was created without bean instantiation errors
assertNotNull(
process,
"Write process should be created successfully. " +
"DI initialization passed - all required beans exist."
)
} catch (e: Exception) {
// Check if it's a DI error (blocker) vs other error
val message = e.message ?: ""
val cause = e.cause?.message ?: ""
if (
message.contains("BeanInstantiationException") ||
message.contains("Failed to inject") ||
message.contains("No bean of type") ||
cause.contains("BeanInstantiationException") ||
cause.contains("Failed to inject") ||
cause.contains("No bean of type")
) {
throw AssertionError(
"Write operation failed to initialize due to DI error. " +
"This means required beans are missing. " +
"Check for: RawTableNameGenerator, FinalTableNameGenerator, " +
"ColumnNameGenerator, ColumnNameMapper, Writer, " +
"AggregatePublishingConfig. " +
"Original error: $message",
e
)
}
// Re-throw other unexpected errors
throw e
}
}
}

View File

@@ -280,8 +280,9 @@ class CdcPartitionReader<T : Comparable<T>>(
val event = DebeziumEvent(changeEvent)
val eventType: EventType = emitRecord(event)
// Update counters.
updateCounters(event, eventType)
if (!engineShuttingDown.get()) {
updateCounters(event, eventType)
}
// Look for reasons to close down the engine.
val closeReason: CloseReason = findCloseReason(event, eventType) ?: return
// At this point, if we haven't returned already, we want to close down the engine.
@@ -341,6 +342,7 @@ class CdcPartitionReader<T : Comparable<T>>(
true ->
runBlocking(Dispatchers.IO) {
recordAcceptor.invoke(deserializedRecord.data, deserializedRecord.changes)
updateCounters(event, EventType.RECORD_EMITTED)
}
// While the engine is running normally, we can emit records synchronously for
// better performance.

View File

@@ -5,6 +5,8 @@ import com.fasterxml.jackson.databind.JsonNode
import com.fasterxml.jackson.databind.node.ObjectNode
import io.airbyte.cdk.command.JdbcSourceConfiguration
import io.airbyte.cdk.command.OpaqueStateValue
import io.airbyte.cdk.output.DataChannelMedium.SOCKET
import io.airbyte.cdk.output.DataChannelMedium.STDIO
import io.airbyte.cdk.output.sockets.toJson
import io.airbyte.cdk.util.Jsons
import io.github.oshai.kotlinlogging.KotlinLogging
@@ -39,7 +41,14 @@ abstract class JdbcPartitionsCreator<
override suspend fun run() {}
override fun checkpoint(): PartitionReadCheckpoint =
PartitionReadCheckpoint(partition.completeState, 0)
PartitionReadCheckpoint(
partition.completeState,
0,
when (streamState.streamFeedBootstrap.dataChannelMedium) {
SOCKET -> generatePartitionId(4)
STDIO -> null
}
)
override fun releaseResources() {}
}

View File

@@ -6,8 +6,8 @@ package io.airbyte.cdk.load.orchestration.db.direct_load_table
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.orchestration.db.Sql
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.cdk.load.table.TableName
interface DirectLoadSqlGenerator {
fun createTable(

View File

@@ -2,19 +2,26 @@
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.orchestration.db.direct_load_table
package io.airbyte.cdk.load.direct_load_table
import io.airbyte.cdk.SystemErrorException
import io.airbyte.cdk.load.command.Append
import io.airbyte.cdk.load.command.Dedupe
import io.airbyte.cdk.load.command.DestinationCatalog
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.command.Overwrite
import io.airbyte.cdk.load.component.TableOperationsClient
import io.airbyte.cdk.load.component.TableSchemaEvolutionClient
import io.airbyte.cdk.load.orchestration.db.DatabaseHandler
import io.airbyte.cdk.load.orchestration.db.DatabaseInitialStatusGatherer
import io.airbyte.cdk.load.orchestration.db.TempTableNameGenerator
import io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping.TableCatalog
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.cdk.load.table.DatabaseInitialStatusGatherer
import io.airbyte.cdk.load.table.TempTableNameGenerator
import io.airbyte.cdk.load.table.directload.DirectLoadInitialStatus
import io.airbyte.cdk.load.table.directload.DirectLoadTableAppendStreamLoader
import io.airbyte.cdk.load.table.directload.DirectLoadTableAppendTruncateStreamLoader
import io.airbyte.cdk.load.table.directload.DirectLoadTableDedupStreamLoader
import io.airbyte.cdk.load.table.directload.DirectLoadTableDedupTruncateStreamLoader
import io.airbyte.cdk.load.table.directload.DirectLoadTableExecutionConfig
import io.airbyte.cdk.load.write.DestinationWriter
import io.airbyte.cdk.load.write.StreamLoader
import io.airbyte.cdk.load.write.StreamStateStore
@@ -26,7 +33,7 @@ import io.airbyte.cdk.load.write.StreamStateStore
*/
class DirectLoadTableWriter(
private val internalNamespace: String,
private val names: TableCatalog,
private val names: DestinationCatalog,
private val stateGatherer: DatabaseInitialStatusGatherer<DirectLoadInitialStatus>,
private val destinationHandler: DatabaseHandler,
private val schemaEvolutionClient: TableSchemaEvolutionClient,
@@ -36,19 +43,18 @@ class DirectLoadTableWriter(
) : DestinationWriter {
private lateinit var initialStatuses: Map<DestinationStream, DirectLoadInitialStatus>
override suspend fun setup() {
val namespaces =
names.values.map { (tableNames, _) -> tableNames.finalTableName!!.namespace }.toSet()
val namespaces = names.streams.map { it.tableSchema.tableNames.finalTableName!!.namespace }
destinationHandler.createNamespaces(namespaces + listOf(internalNamespace))
initialStatuses = stateGatherer.gatherInitialStatus(names)
initialStatuses = stateGatherer.gatherInitialStatus()
}
override fun createStreamLoader(stream: DestinationStream): StreamLoader {
val initialStatus = initialStatuses[stream]!!
val tableNameInfo = names[stream]!!
val realTableName = tableNameInfo.tableNames.finalTableName!!
val tempTableName = tempTableNameGenerator.generate(realTableName)
val columnNameMapping = tableNameInfo.columnNameMapping
val realTableName = stream.tableSchema.tableNames.finalTableName!!
val tempTableName = stream.tableSchema.tableNames.tempTableName!!
val columnNameMapping =
ColumnNameMapping(stream.tableSchema.columnSchema.inputToFinalColumnNames)
return when (stream.minimumGenerationId) {
0L ->
when (stream.importType) {

View File

@@ -1,19 +0,0 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping
data class AlterTableReport(
val columnsToAdd: Set<String>,
val columnsToRemove: Set<String>,
val columnsToChangeType: Set<String>,
) {
/**
* A no-op for an AlterTableReport is when the existing table matches the expected schema
*
* @return whether the schema matches
*/
val isNoOp =
columnsToAdd.isEmpty() && columnsToRemove.isEmpty() && columnsToChangeType.isEmpty()
}

View File

@@ -1,260 +0,0 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping
import io.airbyte.cdk.load.command.DestinationCatalog
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.orchestration.db.ColumnNameGenerator
import io.airbyte.cdk.load.orchestration.db.FinalTableNameGenerator
import io.airbyte.cdk.load.orchestration.db.RawTableNameGenerator
import io.airbyte.cdk.load.orchestration.db.TableNames
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.cdk.load.table.TableName
import io.github.oshai.kotlinlogging.KotlinLogging
import io.micronaut.context.annotation.Factory
import javax.inject.Singleton
import org.apache.commons.codec.digest.DigestUtils
private val LOGGER = KotlinLogging.logger {}
const val DEFAULT_AIRBYTE_INTERNAL_NAMESPACE = "airbyte_internal"
data class TableNameInfo(val tableNames: TableNames, val columnNameMapping: ColumnNameMapping)
data class TableCatalog(private val catalog: Map<DestinationStream, TableNameInfo>) :
Map<DestinationStream, TableNameInfo> by catalog {
fun getMappedColumnName(stream: DestinationStream, colName: String): String? =
this[stream]?.columnNameMapping?.get(colName)
}
data class TableCatalogByDescriptor(
private val catalog: Map<DestinationStream.Descriptor, TableNameInfo>
) : Map<DestinationStream.Descriptor, TableNameInfo> by catalog {
fun getFinalTableName(desc: DestinationStream.Descriptor): TableName? =
this[desc]?.tableNames?.finalTableName
}
@Factory
class TableCatalogFactory {
@Singleton
fun getTableCatalog(
catalog: DestinationCatalog,
// Raw table generator is optional. Direct-load destinations don't need it
// (unless they were previously T+D destinations, in which case it's still required
// so that we maintain stable names with the T+D version)
rawTableNameGenerator: RawTableNameGenerator?,
finalTableNameGenerator: FinalTableNameGenerator,
finalTableColumnNameGenerator: ColumnNameGenerator,
): TableCatalog {
val processedRawTableNames =
if (rawTableNameGenerator != null) {
mutableSetOf<TableName>()
} else {
null
}
val processedFinalTableNames = mutableSetOf<TableName>()
val result = mutableMapOf<DestinationStream, TableNameInfo>()
catalog.streams.forEach { stream ->
val originalRawTableName = rawTableNameGenerator?.getTableName(stream.mappedDescriptor)
val originalFinalTableName =
finalTableNameGenerator.getTableName(stream.mappedDescriptor)
val currentRawProcessedName: TableName?
val currentFinalProcessedName: TableName
val rawTableNameColliding =
processedRawTableNames?.let { originalRawTableName in it } ?: false
val finalTableNameColliding = originalFinalTableName in processedFinalTableNames
if (rawTableNameColliding || finalTableNameColliding) {
LOGGER.info {
"Detected table name collision for ${stream.mappedDescriptor.namespace}.${stream.mappedDescriptor.name}"
}
// Create a hash-suffixed name to avoid collision
val hash =
DigestUtils.sha1Hex(
"${originalFinalTableName.namespace}&airbyte&${stream.mappedDescriptor.name}"
)
.substring(0, 3)
val newName = "${stream.mappedDescriptor.name}_$hash"
currentRawProcessedName =
rawTableNameGenerator?.getTableName(
stream.mappedDescriptor.copy(name = newName)
)
processedRawTableNames?.add(currentRawProcessedName!!)
currentFinalProcessedName =
finalTableNameGenerator.getTableName(
stream.mappedDescriptor.copy(name = newName)
)
processedFinalTableNames.add(currentFinalProcessedName)
} else {
processedRawTableNames?.add(originalRawTableName!!)
processedFinalTableNames.add(originalFinalTableName)
currentRawProcessedName = originalRawTableName
currentFinalProcessedName = originalFinalTableName
}
// Create column name mapping with collision handling
val columnNameMapping = createColumnNameMapping(stream, finalTableColumnNameGenerator)
result[stream] =
TableNameInfo(
TableNames(
rawTableName = currentRawProcessedName,
finalTableName = currentFinalProcessedName,
),
columnNameMapping
)
}
return TableCatalog(result)
}
/**
* Creates column name mapping with handling for potential collisions using incremental
* numbering, with advanced resolution for truncation cases.
*/
private fun createColumnNameMapping(
stream: DestinationStream,
finalTableColumnNameGenerator: ColumnNameGenerator,
): ColumnNameMapping {
val processedColumnNames = mutableSetOf<ColumnNameGenerator.ColumnName>()
val columnMappings = mutableMapOf<String, String>()
// Map to track original column names by their truncated versions
stream.schema.asColumns().forEach { (columnName, _) ->
val processedColumnName = finalTableColumnNameGenerator.getColumnName(columnName)
// Get a unique column name by adding incremental numbers if necessary
val finalColumnName =
resolveColumnNameCollision(
stream,
processedColumnName,
existingNames = processedColumnNames,
originalColumnName = columnName,
finalTableColumnNameGenerator,
)
processedColumnNames.add(finalColumnName)
columnMappings[columnName] = finalColumnName.displayName
}
return ColumnNameMapping(columnMappings)
}
/**
* Resolves column name collisions by first trying incremental suffixes (_1, _2, etc.) If that
* doesn't work due to name truncation, uses the more powerful superResolveColumnCollisions.
*
* @param processedName The name after initial processing by the column name generator
* @param existingNames Set of names already used for other columns
* @param originalColumnName The original column name before processing
*/
private fun resolveColumnNameCollision(
stream: DestinationStream,
processedName: ColumnNameGenerator.ColumnName,
existingNames: Set<ColumnNameGenerator.ColumnName>,
originalColumnName: String,
finalTableColumnNameGenerator: ColumnNameGenerator,
): ColumnNameGenerator.ColumnName {
// If processed name is unique, use it
if (!existingNames.hasConflict(processedName)) {
return processedName
}
LOGGER.info {
"Detected column name collision for ${stream.mappedDescriptor.namespace}.${stream.mappedDescriptor.name}.$originalColumnName"
}
// Try adding incremental suffixes until we find a non-colliding name
var counter = 1
var candidateName: ColumnNameGenerator.ColumnName
var previousCandidate = processedName
do {
// Generate candidate name by adding numeric suffix
candidateName =
finalTableColumnNameGenerator.getColumnName("${originalColumnName}_$counter")
// Check if we're making progress (detecting potential truncation)
if (candidateName.canonicalName == previousCandidate.canonicalName) {
// We're not making progress, likely due to name truncation
// Use the more powerful resolution method with the ORIGINAL column name
return superResolveColumnCollisions(
originalColumnName,
existingNames,
processedName.canonicalName.length,
finalTableColumnNameGenerator,
)
}
previousCandidate = candidateName
counter++
} while (existingNames.hasConflict(candidateName))
return candidateName
}
/**
* Generates a name of the format `<prefix><length><suffix>` when simple suffix-based conflict
* resolution fails due to name truncation. E.g. for affixLength=3: "veryLongName" -> "ver6ame"
*
* @param originalName The original column name that caused collision
* @param existingNames Set of existing column names to avoid collision with
* @param maximumColumnNameLength The maximum allowed length for the column name
*/
private fun superResolveColumnCollisions(
originalName: String,
existingNames: Set<ColumnNameGenerator.ColumnName>,
maximumColumnNameLength: Int,
finalTableColumnNameGenerator: ColumnNameGenerator,
): ColumnNameGenerator.ColumnName {
// Assume that the <length> portion can be expressed in at most 5 characters.
// If someone is giving us a column name that's longer than 99999 characters,
// that's just being silly.
val affixLength = (maximumColumnNameLength - 5) / 2
// If, after reserving 5 characters for the length, we can't fit the affixes,
// just give up. That means the destination is trying to restrict us to a
// 6-character column name, which is just silly.
if (affixLength <= 0) {
throw IllegalArgumentException(
"Cannot solve column name collision: $originalName. We recommend removing this column to continue syncing."
)
}
val prefix = originalName.substring(0, affixLength)
val suffix = originalName.substring(originalName.length - affixLength, originalName.length)
val length = originalName.length - 2 * affixLength
val newColumnName = finalTableColumnNameGenerator.getColumnName("$prefix$length$suffix")
// If there's still a collision after this, just give up.
// We could try to be more clever, but this is already a pretty rare case.
if (existingNames.hasConflict(newColumnName)) {
throw IllegalArgumentException(
"Cannot solve column name collision: $originalName. We recommend removing this column to continue syncing."
)
}
return newColumnName
}
@Singleton
fun getTableCatalogByDescriptor(map: TableCatalog): TableCatalogByDescriptor {
return TableCatalogByDescriptor(map.mapKeys { (k, _) -> k.mappedDescriptor })
}
}
/**
* can't just use `.contains()`, because we don't care whether the column names have the same
* display name. We only care about the canonical name.
*
* (arguably we could override equals/hashcode? But that would make writing tests more difficult,
* because it's not an intuitive behavior)
*/
private fun Collection<ColumnNameGenerator.ColumnName>.hasConflict(
candidate: ColumnNameGenerator.ColumnName
) = this.any { it.canonicalName == candidate.canonicalName }

View File

@@ -1,54 +0,0 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping
import io.airbyte.cdk.load.orchestration.db.DatabaseInitialStatus
import java.time.Instant
data class TypingDedupingDatabaseInitialStatus(
/** Initial status of the final table, or null if the table doesn't exist yet. */
val finalTableStatus: FinalTableInitialStatus?,
val rawTableStatus: RawTableInitialStatus?,
val tempRawTableStatus: RawTableInitialStatus?,
) : DatabaseInitialStatus
data class FinalTableInitialStatus(
val isSchemaMismatch: Boolean,
val isEmpty: Boolean,
/** The generation ID of _any_ record from the final table, or `null` if the table is empty. */
val finalTableGenerationId: Long?,
)
data class RawTableInitialStatus(
/**
* Whether there were any records with null `_airbyte_loaded_at`, at the time that this status
* was fetched.
*/
val hasUnprocessedRecords: Boolean,
/**
* The highest timestamp such that all records in `SELECT * FROM raw_table WHERE
* _airbyte_extracted_at <= ?` have a nonnull `_airbyte_loaded_at`.
*
* Destinations MAY use this value to only run T+D on records with `_airbyte_extracted_at > ?`
* (note the strictly-greater comparison).
*/
val maxProcessedTimestamp: Instant?,
) {
companion object {
/**
* If the raw table doesn't exist, we'll obviously need to create it. After creating a raw
* table, this is its default state (i.e. it has no records, so there are by definition no
* unprocessed records, and no processed records).
*/
val emptyTableStatus = RawTableInitialStatus(false, maxProcessedTimestamp = null)
}
}
/**
* Many callers need to do a `create table if not exists`. This is a utility method to update the
* initial status accordingly - i.e. if the table already existed, retain its status; otherwise, use
* the empty table status.
*/
fun RawTableInitialStatus?.reify() = this ?: RawTableInitialStatus.emptyTableStatus

View File

@@ -1,9 +0,0 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping
data class TypingDedupingExecutionConfig(
val rawTableSuffix: String,
)

View File

@@ -1,136 +0,0 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.orchestration.db.DatabaseHandler
import io.airbyte.cdk.load.orchestration.db.TableNames
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.cdk.load.table.TableName
import io.airbyte.cdk.load.table.TableSuffixes.SOFT_RESET_SUFFIX
import io.github.oshai.kotlinlogging.KotlinLogging
import java.time.Instant
private val logger = KotlinLogging.logger {}
class TypingDedupingFinalTableOperations(
private val sqlGenerator: TypingDedupingSqlGenerator,
private val databaseHandler: DatabaseHandler,
) {
fun createFinalTable(
stream: DestinationStream,
finalTableName: TableName,
columnNameMapping: ColumnNameMapping,
finalTableSuffix: String,
replace: Boolean
) {
logger.info {
"Creating final table for stream ${stream.mappedDescriptor.toPrettyString()} with name ${finalTableName.toPrettyString()}"
}
databaseHandler.execute(
sqlGenerator.createFinalTable(
stream,
finalTableName,
columnNameMapping,
finalTableSuffix,
replace = replace
)
)
}
/** Reset the final table using a temp table or ALTER existing table's columns. */
fun softResetFinalTable(
stream: DestinationStream,
tableNames: TableNames,
columnNameMapping: ColumnNameMapping,
) {
logger.info {
"Executing soft reset for stream ${stream.mappedDescriptor.toPrettyString()} on tables ${tableNames.toPrettyString()}"
}
databaseHandler.execute(
sqlGenerator.prepareTablesForSoftReset(stream, tableNames, columnNameMapping)
)
typeAndDedupe(
stream,
tableNames,
columnNameMapping,
maxProcessedTimestamp = null,
finalTableSuffix = SOFT_RESET_SUFFIX,
)
databaseHandler.execute(
sqlGenerator.overwriteFinalTable(
stream,
tableNames.finalTableName!!,
finalTableSuffix = SOFT_RESET_SUFFIX
)
)
}
/**
* Attempt to atomically swap the final table from the temp version. This could be destination
* specific, INSERT INTO..SELECT * and DROP TABLE OR CREATE OR REPLACE ... SELECT *, DROP TABLE
*/
fun overwriteFinalTable(
stream: DestinationStream,
finalTableName: TableName,
finalTableSuffix: String,
) {
logger.info {
"Overwriting final table for stream ${stream.mappedDescriptor.toPrettyString()} with name ${finalTableName.toPrettyString()} using temp table with suffix $finalTableSuffix"
}
databaseHandler.execute(
sqlGenerator.overwriteFinalTable(
stream,
finalTableName,
finalTableSuffix = finalTableSuffix
)
)
}
fun typeAndDedupe(
stream: DestinationStream,
tableNames: TableNames,
columnNameMapping: ColumnNameMapping,
maxProcessedTimestamp: Instant?,
finalTableSuffix: String
) {
try {
logger.info {
"Attempting typing and deduping for stream ${stream.mappedDescriptor.toPrettyString()} on tables ${tableNames.toPrettyString()} with suffix $finalTableSuffix"
}
val unsafeSql =
sqlGenerator.updateFinalTable(
stream,
tableNames,
columnNameMapping,
finalTableSuffix = finalTableSuffix,
maxProcessedTimestamp = maxProcessedTimestamp,
useExpensiveSaferCasting = false,
)
databaseHandler.execute(unsafeSql)
} catch (e: Exception) {
if (sqlGenerator.supportsExpensiveSaferCasting) {
logger.info(e) {
"Encountered Exception on unsafe SQL for stream ${stream.mappedDescriptor.toPrettyString()} on tables ${tableNames.toPrettyString()} with suffix $finalTableSuffix, re-attempting with error handling"
}
val saferSql =
sqlGenerator.updateFinalTable(
stream,
tableNames,
columnNameMapping,
finalTableSuffix = finalTableSuffix,
maxProcessedTimestamp = maxProcessedTimestamp,
useExpensiveSaferCasting = true,
)
databaseHandler.execute(saferSql)
} else {
logger.info(e) {
"Encountered Exception on unsafe SQL for stream ${stream.mappedDescriptor.toPrettyString()} on tables ${tableNames.toPrettyString()} with suffix $finalTableSuffix, not retrying"
}
throw e
}
}
}
}

View File

@@ -1,42 +0,0 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping
import io.airbyte.cdk.load.table.TableName
interface TypingDedupingRawTableOperations {
/**
* Prepare the raw table, including any associated blob storage. Similar to [createFinalTable],
* accepts a [suffix] parameter, which should be used in conjunction with [overwriteRawTable].
*
* @param replace If true, then replace existing resources with empty e.g. tables. If false,
* then leave existing resources untouched.
*/
fun prepareRawTable(rawTableName: TableName, suffix: String, replace: Boolean = false)
/**
* Swap the "temporary" raw table into the "real" raw table. For example, `DROP TABLE IF NOT
* EXISTS airbyte_internal.foo; ALTER TABLE airbyte_internal.foo_tmp RENAME TO foo`.
*/
fun overwriteRawTable(rawTableName: TableName, suffix: String)
/**
* Copy all records from the temporary raw table into the real raw table, then drop the
* temporary raw table. For example `INSERT INTO airbyte_internal.foo SELECT * FROM
* airbyte_internal.foo_tmp; DROP TABLE airbyte_internal.foo_tmp`.
*/
fun transferFromTempRawTable(rawTableName: TableName, suffix: String)
/**
* Get the generation of a single record in the raw table. Not necessarily the min or max
* generation, just _any_ record.
*
* [TypingDedupingStreamLoader] is responsible for orchestrating the raw tables so that the temp
* raw table always contains exactly one generation.
*
* @return The generation ID of a record in the raw table, or `null` if the raw table is empty.
*/
fun getRawTableGeneration(rawTableName: TableName, suffix: String): Long?
}

View File

@@ -1,145 +0,0 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.orchestration.db.Sql
import io.airbyte.cdk.load.orchestration.db.TableNames
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.cdk.load.table.TableName
import io.airbyte.cdk.load.table.TableSuffixes.SOFT_RESET_SUFFIX
import java.time.Instant
interface TypingDedupingSqlGenerator {
/**
* Generate a SQL statement to create a fresh table to match the given stream.
*
* The generated SQL should throw an exception if the table already exists and `replace` is
* false.
*
* @param finalTableSuffix A suffix to add to the stream name. Useful for full refresh overwrite
* syncs, where we write the entire sync to a temp table.
* @param replace If true, will overwrite an existing table. If false, will throw an exception
* if the table already exists. If you're passing a non-empty prefix, you likely want to set
* this to true.
*/
fun createFinalTable(
stream: DestinationStream,
tableName: TableName,
columnNameMapping: ColumnNameMapping,
finalTableSuffix: String,
replace: Boolean
): Sql
/**
* Whether [updateFinalTable] actually generates different SQL when `useExpensiveSaferCasting`
* is enabled. Some destinations don't have this distinction, and should override this field to
* `false`.
*/
val supportsExpensiveSaferCasting: Boolean
get() = true
/**
* Generate a SQL statement to copy new data from the raw table into the final table.
*
* Responsible for:
*
* * Pulling new raw records from a table (i.e. records with null _airbyte_loaded_at)
* * Extracting the JSON fields and casting to the appropriate types
* * Handling errors in those casts
* * Merging those typed records into an existing table
* * Updating the raw records with SET _airbyte_loaded_at = now()
*
* Implementing classes are recommended to break this into smaller methods, which can be tested
* in isolation. However, this interface only requires a single mega-method.
*
* @param finalTableSuffix the suffix of the final table to write to. If empty string, writes to
* the final table directly. Useful for full refresh overwrite syncs, where we write the entire
* sync to a temp table and then swap it into the final table at the end.
*
* @param minRawTimestamp The latest _airbyte_extracted_at for which all raw records with that
* timestamp have already been typed+deduped. Implementations MAY use this value in a
* `_airbyte_extracted_at > minRawTimestamp` filter on the raw table to improve query
* performance.
* @param useExpensiveSaferCasting often the data coming from the source can be faithfully
* represented in the destination without issue, and using a "CAST" expression works fine,
* however sometimes we get badly typed data. In these cases we can use a more expensive query
* which handles casting exceptions.
*/
fun updateFinalTable(
stream: DestinationStream,
tableNames: TableNames,
columnNameMapping: ColumnNameMapping,
finalTableSuffix: String,
maxProcessedTimestamp: Instant?,
useExpensiveSaferCasting: Boolean,
): Sql
/**
* Drop the previous final table, and rename the new final table to match the old final table.
*
* This method may assume that the stream is an OVERWRITE stream, and that the final suffix is
* non-empty. Callers are responsible for verifying those are true.
*/
fun overwriteFinalTable(
stream: DestinationStream,
finalTableName: TableName,
finalTableSuffix: String,
): Sql
fun clearLoadedAt(stream: DestinationStream, rawTableName: TableName): Sql
/** Typically we need to create a soft reset temporary table and clear loaded at values */
fun prepareTablesForSoftReset(
stream: DestinationStream,
tableNames: TableNames,
columnNameMapping: ColumnNameMapping,
): Sql {
val createTempTable =
createFinalTable(
stream,
tableNames.finalTableName!!,
columnNameMapping,
SOFT_RESET_SUFFIX,
replace = true
)
val clearLoadedAt = clearLoadedAt(stream, tableNames.rawTableName!!)
return Sql.concat(createTempTable, clearLoadedAt)
}
}
/**
* We are switching all destinations away from T+D, to use direct-load tables instead. However, some
* destinations will continue to provide a "legacy raw tables" mode, which writes the raw table
* format of T+D, but with the actual T+D disabled.
*
* This sqlgenerator supports that, by simply doing nothing.
*/
object NoopTypingDedupingSqlGenerator : TypingDedupingSqlGenerator {
override fun createFinalTable(
stream: DestinationStream,
tableName: TableName,
columnNameMapping: ColumnNameMapping,
finalTableSuffix: String,
replace: Boolean
) = Sql.empty()
override fun updateFinalTable(
stream: DestinationStream,
tableNames: TableNames,
columnNameMapping: ColumnNameMapping,
finalTableSuffix: String,
maxProcessedTimestamp: Instant?,
useExpensiveSaferCasting: Boolean
) = Sql.empty()
override fun overwriteFinalTable(
stream: DestinationStream,
finalTableName: TableName,
finalTableSuffix: String
) = Sql.empty()
override fun clearLoadedAt(stream: DestinationStream, rawTableName: TableName) = Sql.empty()
}

View File

@@ -1,397 +0,0 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.orchestration.db.TableNames
import io.airbyte.cdk.load.state.StreamProcessingFailed
import io.airbyte.cdk.load.table.ColumnNameMapping
import io.airbyte.cdk.load.table.TableSuffixes.NO_SUFFIX
import io.airbyte.cdk.load.table.TableSuffixes.TMP_TABLE_SUFFIX
import io.airbyte.cdk.load.write.StreamLoader
import io.airbyte.cdk.load.write.StreamStateStore
import io.github.oshai.kotlinlogging.KotlinLogging
import java.time.Instant
private val logger = KotlinLogging.logger {}
class TypingDedupingStreamLoader(
override val stream: DestinationStream,
private val initialStatus: TypingDedupingDatabaseInitialStatus,
private val tableNames: TableNames,
private val columnNameMapping: ColumnNameMapping,
private val rawTableOperations: TypingDedupingRawTableOperations,
private val finalTableOperations: TypingDedupingFinalTableOperations,
private val disableTypeDedupe: Boolean,
private val streamStateStore: StreamStateStore<TypingDedupingExecutionConfig>,
) : StreamLoader {
private val isTruncateSync =
when (stream.minimumGenerationId) {
0L -> false
stream.generationId -> true
else -> {
throw IllegalArgumentException("Hybrid refreshes are not yet supported.")
}
}
private lateinit var rawTableSuffix: String
private lateinit var finalTmpTableSuffix: String
/**
* The status of the raw table that "matters" for this sync. Specifically:
* * For normal syncs / merge refreshes, this is the status of the real raw table)
* * For truncate refreshes, this is the status of the temp raw table (because we never even
* look at the real raw table)
*/
private lateinit var initialRawTableStatus: RawTableInitialStatus
override suspend fun start() {
if (isTruncateSync) {
val (rawTableStatus, suffix) = prepareStageForTruncate()
initialRawTableStatus = rawTableStatus
rawTableSuffix = suffix
} else {
rawTableSuffix = NO_SUFFIX
initialRawTableStatus = prepareStageForNormalSync()
}
if (!disableTypeDedupe) {
// Prepare final tables based on sync mode.
finalTmpTableSuffix = prepareFinalTable()
} else {
logger.info { "Typing and deduping disabled, skipping final table initialization" }
finalTmpTableSuffix = NO_SUFFIX
}
streamStateStore.put(
stream.mappedDescriptor,
TypingDedupingExecutionConfig(rawTableSuffix),
)
}
private fun prepareStageForTruncate(): Pair<RawTableInitialStatus, String> {
/*
tl;dr:
* if a temp raw table exists, check whether it belongs to the correct generation.
* if wrong generation, truncate it.
* regardless, write into the temp raw table.
* else, if a real raw table exists, check its generation.
* if wrong generation, write into a new temp raw table.
* else, write into the preexisting real raw table.
* else, create a new temp raw table and write into it.
*/
if (initialStatus.tempRawTableStatus != null) {
val tempStageGeneration =
rawTableOperations.getRawTableGeneration(
tableNames.rawTableName!!,
TMP_TABLE_SUFFIX
)
if (tempStageGeneration == null || tempStageGeneration == stream.generationId) {
logger.info {
"${stream.mappedDescriptor.toPrettyString()}: truncate sync, and existing temp raw table belongs to generation $tempStageGeneration (== current generation ${stream.generationId}). Retaining it."
}
// The temp table is from the correct generation. Set up any other resources
// (staging file, etc.), but leave the table untouched.
rawTableOperations.prepareRawTable(
tableNames.rawTableName,
TMP_TABLE_SUFFIX,
)
return Pair(initialStatus.tempRawTableStatus.reify(), TMP_TABLE_SUFFIX)
} else {
logger.info {
"${stream.mappedDescriptor.toPrettyString()}: truncate sync, and existing temp raw table belongs to generation $tempStageGeneration (!= current generation ${stream.generationId}). Truncating it."
}
// The temp stage is from the wrong generation. Nuke it.
rawTableOperations.prepareRawTable(
tableNames.rawTableName,
TMP_TABLE_SUFFIX,
replace = true,
)
// We nuked the temp raw table, so create a new initial raw table status.
return Pair(
RawTableInitialStatus.emptyTableStatus,
TMP_TABLE_SUFFIX,
)
}
} else if (initialStatus.rawTableStatus != null) {
// It's possible to "resume" a truncate sync that was previously already finalized.
// In this case, there is no existing temp raw table, and there is a real raw table
// which already belongs to the correct generation.
// Check for that case now.
val realStageGeneration =
rawTableOperations.getRawTableGeneration(tableNames.rawTableName!!, NO_SUFFIX)
if (realStageGeneration == null || realStageGeneration == stream.generationId) {
logger.info {
"${stream.mappedDescriptor.toPrettyString()}: truncate sync, no existing temp raw table, and existing real raw table belongs to generation $realStageGeneration (== current generation ${stream.generationId}). Retaining it."
}
// The real raw table is from the correct generation. Set up any other resources
// (staging file, etc.), but leave the table untouched.
rawTableOperations.prepareRawTable(tableNames.rawTableName, NO_SUFFIX)
return Pair(initialStatus.rawTableStatus.reify(), NO_SUFFIX)
} else {
logger.info {
"${stream.mappedDescriptor.toPrettyString()}: truncate sync, existing real raw table belongs to generation $realStageGeneration (!= current generation ${stream.generationId}), and no preexisting temp raw table. Creating a temp raw table."
}
// We're initiating a new truncate refresh. Create a new temp stage.
rawTableOperations.prepareRawTable(
tableNames.rawTableName,
TMP_TABLE_SUFFIX,
)
return Pair(
// Create a fresh raw table status, since we created a fresh temp stage.
RawTableInitialStatus.emptyTableStatus,
TMP_TABLE_SUFFIX,
)
}
} else {
logger.info {
"${stream.mappedDescriptor.toPrettyString()}: truncate sync, and no preexisting temp or raw table. Creating a temp raw table."
}
// We're initiating a new truncate refresh. Create a new temp stage.
rawTableOperations.prepareRawTable(
tableNames.rawTableName!!,
TMP_TABLE_SUFFIX,
)
return Pair(
// Create a fresh raw table status, since we created a fresh temp stage.
RawTableInitialStatus.emptyTableStatus,
TMP_TABLE_SUFFIX,
)
}
}
private fun prepareStageForNormalSync(): RawTableInitialStatus {
logger.info {
"${stream.mappedDescriptor.toPrettyString()}: non-truncate sync. Creating raw table if not exists."
}
rawTableOperations.prepareRawTable(tableNames.rawTableName!!, NO_SUFFIX)
if (initialStatus.tempRawTableStatus != null) {
logger.info {
"${stream.mappedDescriptor.toPrettyString()}: non-truncate sync, but temp raw table exists. Transferring it to real raw table."
}
// There was a previous truncate refresh attempt, which failed, and left some
// records behind.
// Retrieve those records and put them in the real stage.
// This is necessary to avoid certain data loss scenarios.
// (specifically: a user initiates a truncate sync, which fails, but emits some records.
// It also emits a state message for "resumable" full refresh.
// The user then initiates an incremental sync, which runs using that state.
// In this case, we MUST retain the records from the truncate attempt.)
rawTableOperations.transferFromTempRawTable(tableNames.rawTableName, TMP_TABLE_SUFFIX)
// We need to combine the raw table statuses from the real and temp raw tables.
val hasUnprocessedRecords =
initialStatus.tempRawTableStatus.hasUnprocessedRecords ||
(initialStatus.rawTableStatus?.hasUnprocessedRecords ?: false)
// Pick the earlier min timestamp.
val maxProcessedTimestamp: Instant? =
initialStatus.rawTableStatus?.maxProcessedTimestamp?.let { realRawTableTimestamp ->
initialStatus.tempRawTableStatus.maxProcessedTimestamp?.let {
tempRawTableTimestamp ->
if (realRawTableTimestamp.isBefore(tempRawTableTimestamp)) {
realRawTableTimestamp
} else {
tempRawTableTimestamp
}
}
?: realRawTableTimestamp
}
?: initialStatus.tempRawTableStatus.maxProcessedTimestamp
val updatedStatus =
RawTableInitialStatus(
hasUnprocessedRecords = hasUnprocessedRecords,
maxProcessedTimestamp = maxProcessedTimestamp,
)
logger.info {
"${stream.mappedDescriptor.toPrettyString()}: After record transfer, initial raw table status is $updatedStatus."
}
return updatedStatus
} else {
val initialRawTableStatus = initialStatus.rawTableStatus.reify()
logger.info {
"${stream.mappedDescriptor.toPrettyString()}: non-truncate sync and no temp raw table. Initial raw table status is $initialRawTableStatus."
}
return initialRawTableStatus
}
}
private fun prepareFinalTable(): String {
// No special handling if final table doesn't exist, just create and return
if (initialStatus.finalTableStatus == null) {
logger.info {
"Final table does not exist for stream ${stream.mappedDescriptor.toPrettyString()}, creating ${tableNames.finalTableName!!.toPrettyString()}."
}
finalTableOperations.createFinalTable(
stream,
tableNames.finalTableName!!,
columnNameMapping,
NO_SUFFIX,
replace = false
)
return NO_SUFFIX
}
logger.info { "Final Table exists for stream ${stream.mappedDescriptor.toPrettyString()}" }
// The table already exists. Decide whether we're writing to it directly, or
// using a tmp table.
if (isTruncateSync) {
if (
initialStatus.finalTableStatus.isEmpty ||
initialStatus.finalTableStatus.finalTableGenerationId == null
) {
if (!initialStatus.finalTableStatus.isSchemaMismatch) {
logger.info {
"Truncate sync, and final table is empty and has correct schema. Writing to it directly."
}
return NO_SUFFIX
} else {
// No point soft resetting an empty table. We'll just do an overwrite later.
logger.info {
"Truncate sync, and final table is empty, but has the wrong schema. Using a temp final table."
}
return prepareFinalTableForOverwrite()
}
} else if (
initialStatus.finalTableStatus.finalTableGenerationId >= stream.minimumGenerationId
) {
if (!initialStatus.finalTableStatus.isSchemaMismatch) {
logger.info {
"Truncate sync, and final table matches our generation and has correct schema. Writing to it directly."
}
return NO_SUFFIX
} else {
logger.info {
"Truncate sync, and final table matches our generation, but has the wrong schema. Writing to it directly, but triggering a soft reset first."
}
finalTableOperations.softResetFinalTable(stream, tableNames, columnNameMapping)
return NO_SUFFIX
}
} else {
// The final table is in the wrong generation. Use a temp final table.
return prepareFinalTableForOverwrite()
}
} else {
if (initialStatus.finalTableStatus.isSchemaMismatch) {
// We're loading data directly into the existing table.
// Make sure it has the right schema.
// Also, if a raw table migration wants us to do a soft reset, do that
// here.
logger.info {
"Executing soft-reset on final table of stream ${stream.mappedDescriptor}"
}
finalTableOperations.softResetFinalTable(stream, tableNames, columnNameMapping)
}
return NO_SUFFIX
}
}
private fun prepareFinalTableForOverwrite(): String {
if (
initialStatus.finalTableStatus?.isEmpty != true ||
initialStatus.finalTableStatus.isSchemaMismatch
) {
// overwrite an existing tmp table if needed.
finalTableOperations.createFinalTable(
stream,
tableNames.finalTableName!!,
columnNameMapping,
TMP_TABLE_SUFFIX,
replace = true
)
logger.info {
"Using temp final table for table ${stream.mappedDescriptor.toPrettyString()}, this will be overwritten at end of sync"
}
// We want to overwrite an existing table. Write into a tmp table.
// We'll overwrite the table at the end of the sync.
return TMP_TABLE_SUFFIX
}
logger.info {
"Final Table for stream ${stream.mappedDescriptor.toPrettyString()} is empty and matches the expected v2 format, writing to table directly"
}
return NO_SUFFIX
}
override suspend fun close(hadNonzeroRecords: Boolean, streamFailure: StreamProcessingFailed?) {
val streamSuccessful = streamFailure == null
// Overwrite the raw table before doing anything else.
// This ensures that if T+D fails, we can easily retain the records on the next sync.
// It also means we don't need to run T+D using the temp raw table,
// which is possible (`typeAndDedupe(streamConfig.id.copy(rawName = streamConfig.id.rawName
// + suffix))`
// but annoying and confusing.
if (isTruncateSync && streamSuccessful && rawTableSuffix.isNotEmpty()) {
logger.info {
"Overwriting raw table for ${stream.mappedDescriptor.toPrettyString()} because this is a truncate sync, we received a stream success message, and are using a temporary raw table."
}
rawTableOperations.overwriteRawTable(tableNames.rawTableName!!, rawTableSuffix)
} else {
logger.info {
"Not overwriting raw table for ${stream.mappedDescriptor.toPrettyString()}. Truncate sync: $isTruncateSync; stream success: $streamSuccessful; raw table suffix: \"$rawTableSuffix\""
}
}
if (disableTypeDedupe) {
logger.info {
"Typing and deduping disabled, skipping final table finalization. Raw records can be found at ${tableNames.rawTableName!!.toPrettyString()}"
}
return
}
// Normal syncs should T+D regardless of status, so the user sees progress after every
// attempt.
// We know this is a normal sync, so initialRawTableStatus is nonnull.
if (!isTruncateSync && !hadNonzeroRecords && !initialRawTableStatus.hasUnprocessedRecords) {
logger.info {
"Skipping typing and deduping for stream ${stream.mappedDescriptor.toPrettyString()} because it had no records during this sync and no unprocessed records from a previous sync."
}
} else if (
isTruncateSync &&
(!streamSuccessful ||
(!hadNonzeroRecords && !initialRawTableStatus.hasUnprocessedRecords))
) {
// But truncate syncs should only T+D if the sync was successful, since we're T+Ding
// into a temp final table anyway.
// We only run T+D if the current sync had some records, or a previous attempt wrote
// some records to the temp raw table.
logger.info {
"Skipping typing and deduping for stream ${stream.mappedDescriptor.toPrettyString()} running as truncate sync. Stream success: $streamSuccessful; had nonzero records: $hadNonzeroRecords; temp raw table had records: ${initialRawTableStatus.hasUnprocessedRecords}"
}
} else {
// When targeting the temp final table, we want to read all the raw records
// because the temp final table is always a full rebuild. Typically, this is equivalent
// to filtering on timestamp, but might as well be explicit.
val maxProcessedTimestamp =
if (finalTmpTableSuffix.isEmpty()) {
initialRawTableStatus.maxProcessedTimestamp
} else {
null
}
finalTableOperations.typeAndDedupe(
stream,
tableNames,
columnNameMapping,
maxProcessedTimestamp = maxProcessedTimestamp,
finalTableSuffix = finalTmpTableSuffix
)
}
// We want to run this independently of whether we ran T+D.
// E.g. it's valid for a sync to emit 0 records (e.g. the source table is legitimately
// empty), in which case we want to overwrite the final table with an empty table.
if (isTruncateSync && streamSuccessful && finalTmpTableSuffix.isNotBlank()) {
logger.info {
"Overwriting final table for ${stream.mappedDescriptor.toPrettyString()} because this is a truncate sync, we received a stream success message, and we are using a temp final table.."
}
finalTableOperations.overwriteFinalTable(
stream,
tableNames.finalTableName!!,
finalTableSuffix = finalTmpTableSuffix
)
} else {
logger.info {
"Not overwriting final table for ${stream.mappedDescriptor.toPrettyString()}. Truncate sync: $isTruncateSync; stream success: $streamSuccessful; final table suffix not blank: ${finalTmpTableSuffix.isNotBlank()}"
}
}
}
}

View File

@@ -1,88 +0,0 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.orchestration.db.DatabaseHandler
import io.airbyte.cdk.load.orchestration.db.DatabaseInitialStatusGatherer
import io.airbyte.cdk.load.write.DestinationWriter
import io.airbyte.cdk.load.write.StreamLoader
import io.airbyte.cdk.load.write.StreamStateStore
import java.util.concurrent.Executors
import kotlinx.coroutines.asCoroutineDispatcher
import kotlinx.coroutines.launch
import kotlinx.coroutines.runBlocking
class TypingDedupingWriter(
private val names: TableCatalog,
private val stateGatherer: DatabaseInitialStatusGatherer<TypingDedupingDatabaseInitialStatus>,
private val databaseHandler: DatabaseHandler,
private val rawTableOperations: TypingDedupingRawTableOperations,
private val finalTableOperations: TypingDedupingFinalTableOperations,
private val disableTypeDedupe: Boolean,
private val streamStateStore: StreamStateStore<TypingDedupingExecutionConfig>,
) : DestinationWriter {
private lateinit var initialStatuses:
Map<DestinationStream, TypingDedupingDatabaseInitialStatus>
override suspend fun setup() {
Executors.newFixedThreadPool(10).asCoroutineDispatcher().use { dispatcher ->
val namespaces =
names.values.map { (tableNames, _) -> tableNames.rawTableName!!.namespace } +
names.values.map { (tableNames, _) -> tableNames.finalTableName!!.namespace }
databaseHandler.createNamespaces(namespaces.toSet())
val initialInitialStatuses:
Map<DestinationStream, TypingDedupingDatabaseInitialStatus> =
stateGatherer.gatherInitialStatus(names)
// TODO migrations - we should probably actually drop all existing migrations as part of
// this project, but eventually we'll need some solution here
// If we have a schema mismatch, then execute a soft reset.
val streamsNeedingSoftReset =
initialInitialStatuses.filter { (_, status) ->
// if the table doesn't exist, then by definition we don't have a schema
// mismatch.
status.finalTableStatus?.isSchemaMismatch ?: false
}
runBlocking(dispatcher) {
streamsNeedingSoftReset.forEach { (stream, _) ->
launch {
val (tableNames, columnNameMapping) = names[stream]!!
finalTableOperations.softResetFinalTable(
stream,
tableNames,
columnNameMapping
)
}
}
}
// Soft reset will modify the initial status of a table.
// Refetch their statuses.
val statusesAfterSoftReset =
stateGatherer.gatherInitialStatus(
TableCatalog(names.filterKeys { streamsNeedingSoftReset.containsKey(it) })
)
// second map "wins" when adding two maps together, so we'll retain the newer statuses.
initialStatuses = initialInitialStatuses + statusesAfterSoftReset
}
}
override fun createStreamLoader(stream: DestinationStream): StreamLoader {
val (tableNames, columnNameMapping) = names[stream]!!
return TypingDedupingStreamLoader(
stream,
initialStatuses[stream]!!,
tableNames,
columnNameMapping,
rawTableOperations,
finalTableOperations,
disableTypeDedupe = disableTypeDedupe,
streamStateStore,
)
}
}

View File

@@ -4,8 +4,8 @@
package io.airbyte.cdk.load.toolkits.load.db.orchestration
import io.airbyte.cdk.load.orchestration.db.DefaultTempTableNameGenerator
import io.airbyte.cdk.load.table.TableName
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.table.DefaultTempTableNameGenerator
import org.junit.jupiter.api.Assertions.*
import org.junit.jupiter.api.Test

View File

@@ -1,308 +0,0 @@
/*
* Copyright (c) 2025 Airbyte, Inc., all rights reserved.
*/
package io.airbyte.cdk.load.toolkits.load.db.orchestration
import io.airbyte.cdk.load.command.Append
import io.airbyte.cdk.load.command.DestinationCatalog
import io.airbyte.cdk.load.command.DestinationStream
import io.airbyte.cdk.load.command.NamespaceMapper
import io.airbyte.cdk.load.data.AirbyteType
import io.airbyte.cdk.load.data.FieldType
import io.airbyte.cdk.load.data.ObjectType
import io.airbyte.cdk.load.data.StringType
import io.airbyte.cdk.load.orchestration.db.ColumnNameGenerator
import io.airbyte.cdk.load.orchestration.db.FinalTableNameGenerator
import io.airbyte.cdk.load.orchestration.db.RawTableNameGenerator
import io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping.DEFAULT_AIRBYTE_INTERNAL_NAMESPACE
import io.airbyte.cdk.load.orchestration.db.legacy_typing_deduping.TableCatalogFactory
import io.airbyte.cdk.load.table.TableName
import org.junit.jupiter.api.Assertions.assertAll
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Assertions.assertNull
import org.junit.jupiter.api.Test
class TableCatalogFactoryTest {
@Test
fun testTableNameCollision() {
// Create the same streams as in the original test - "foobarfoo" and "foofoo"
val stream1 = createTestStream("foobarfoo", "a")
val stream2 = createTestStream("foofoo", "a")
// Use SAM syntax with conditional logic in the lambda
val rawTableNameGenerator = RawTableNameGenerator { descriptor ->
TableName(
"airbyte_internal",
"""${descriptor.namespace}_${descriptor.name.replace("bar", "")}""",
)
}
val finalTableNameGenerator = FinalTableNameGenerator { descriptor ->
TableName(
descriptor.namespace!!,
descriptor.name.replace("bar", ""),
)
}
val columnNameGenerator = ColumnNameGenerator { input ->
ColumnNameGenerator.ColumnName(input, input)
}
val catalog = DestinationCatalog(listOf(stream1, stream2))
val tableCatalog =
TableCatalogFactory()
.getTableCatalog(
catalog,
rawTableNameGenerator,
finalTableNameGenerator,
columnNameGenerator
)
// Get the final table names for both streams
val stream1TableInfo = tableCatalog[stream1]!!
val stream2TableInfo = tableCatalog[stream2]!!
assertAll(
{ assertEquals("foofoo", stream1TableInfo.tableNames.finalTableName!!.name) },
{ assertEquals("a", stream1TableInfo.tableNames.finalTableName!!.namespace) },
{ assertEquals("foofoo_3fd", stream2TableInfo.tableNames.finalTableName!!.name) },
{
assertEquals(
"a",
stream2TableInfo.tableNames.finalTableName!!.namespace,
)
}
)
// Now check raw table names with exact expected suffix
assertAll(
{ assertEquals("a_foofoo", stream1TableInfo.tableNames.rawTableName!!.name) },
{
assertEquals(
DEFAULT_AIRBYTE_INTERNAL_NAMESPACE,
stream1TableInfo.tableNames.rawTableName!!.namespace
)
},
{ assertEquals("a_foofoo_3fd", stream2TableInfo.tableNames.rawTableName!!.name) },
{
assertEquals(
DEFAULT_AIRBYTE_INTERNAL_NAMESPACE,
stream2TableInfo.tableNames.rawTableName!!.namespace
)
}
)
}
/**
* Test two streams which don't collide in their final tables, and with no raw tables.
*
* We should leave both streams unchanged.
*/
@Test
fun testTableNameNoCollisionWithNoRawTableGenerator() {
val stream1 = createTestStream("foo", "a")
val stream2 = createTestStream("bar", "a")
val finalTableNameGenerator = FinalTableNameGenerator { descriptor ->
TableName(descriptor.namespace!!, descriptor.name)
}
val columnNameGenerator = ColumnNameGenerator { input ->
ColumnNameGenerator.ColumnName(input, input)
}
val catalog = DestinationCatalog(listOf(stream1, stream2))
val tableCatalog =
TableCatalogFactory()
.getTableCatalog(
catalog,
rawTableNameGenerator = null,
finalTableNameGenerator,
columnNameGenerator
)
// Get the final table names for both streams
val stream1TableInfo = tableCatalog[stream1]!!
val stream2TableInfo = tableCatalog[stream2]!!
assertAll(
{ assertEquals("foo", stream1TableInfo.tableNames.finalTableName!!.name) },
{ assertEquals("a", stream1TableInfo.tableNames.finalTableName!!.namespace) },
{ assertEquals("bar", stream2TableInfo.tableNames.finalTableName!!.name) },
{
assertEquals(
"a",
stream2TableInfo.tableNames.finalTableName!!.namespace,
)
}
)
// Now check raw table names are null
assertAll(
{ assertNull(stream1TableInfo.tableNames.rawTableName) },
{ assertNull(stream2TableInfo.tableNames.rawTableName) },
)
}
@Test
fun testTruncatingColumnNameCollision() {
val schema =
ObjectType(
linkedMapOf(
"aVeryLongColumnName" to FieldType(StringType, true),
"aVeryLongColumnNameWithMoreTextAfterward" to FieldType(StringType, true),
)
)
val stream = createTestStream("stream", "namespace", schema)
val catalog = DestinationCatalog(listOf(stream))
val rawTableNameGenerator = RawTableNameGenerator { _ ->
TableName("raw_dataset", "raw_stream")
}
val finalTableNameGenerator = FinalTableNameGenerator { _ ->
TableName("final_dataset", "final_stream")
}
val columnNameGenerator = ColumnNameGenerator { input ->
val truncated = input.substring(0, 10.coerceAtMost(input.length))
ColumnNameGenerator.ColumnName(truncated, truncated)
}
val tableCatalog =
TableCatalogFactory()
.getTableCatalog(
catalog,
rawTableNameGenerator,
finalTableNameGenerator,
columnNameGenerator
)
val columnMapping = tableCatalog[stream]!!.columnNameMapping
val mappedNames =
listOf(
columnMapping["aVeryLongColumnName"]!!,
columnMapping["aVeryLongColumnNameWithMoreTextAfterward"]!!
)
assertEquals(2, mappedNames.size)
assertEquals("aVeryLongC", mappedNames[0])
assertEquals("aV36rd", mappedNames[1])
}
@Test
fun testColumnNameCollision() {
// Create a schema with columns that will have name collision after processing
val schema =
ObjectType(
linkedMapOf(
"foobarfoo" to FieldType(StringType, true),
"foofoo" to FieldType(StringType, true),
)
)
val stream = createTestStream("stream", "namespace", schema)
val catalog = DestinationCatalog(listOf(stream))
val rawTableNameGenerator = RawTableNameGenerator { _ ->
TableName("raw_dataset", "raw_stream")
}
val finalTableNameGenerator = FinalTableNameGenerator { _ ->
TableName("final_dataset", "final_stream")
}
// Simulate name collision by removing "bar"
val columnNameGenerator = ColumnNameGenerator { input ->
val processedName = input.replace("bar", "")
ColumnNameGenerator.ColumnName(processedName, processedName)
}
val tableCatalog =
TableCatalogFactory()
.getTableCatalog(
catalog,
rawTableNameGenerator,
finalTableNameGenerator,
columnNameGenerator
)
val columnMapping = tableCatalog[stream]!!.columnNameMapping
val mappedColumns = listOf(columnMapping["foobarfoo"]!!, columnMapping["foofoo"]!!)
// Verify column name collision was properly resolved
// One column should be "foofoo" and the other should be "foofoo_1"
assertAll(
{ assertEquals(2, mappedColumns.size) },
{ assertEquals("foofoo", mappedColumns[0]) },
{ assertEquals("foofoo_1", mappedColumns[1]) }
)
}
@Test
fun testColumnNameCollisionRelyingOnCanonicalName() {
val schema =
ObjectType(
linkedMapOf(
"FOO" to FieldType(StringType, true),
"foo" to FieldType(StringType, true),
)
)
val stream = createTestStream("stream", "namespace", schema)
val catalog = DestinationCatalog(listOf(stream))
val rawTableNameGenerator = RawTableNameGenerator { _ ->
TableName("raw_dataset", "raw_stream")
}
val finalTableNameGenerator = FinalTableNameGenerator { _ ->
TableName("final_dataset", "final_stream")
}
// Simulate name collision by downcasing, while retaining the original name
// as the display name
val columnNameGenerator = ColumnNameGenerator { input ->
ColumnNameGenerator.ColumnName(
displayName = input,
canonicalName = input.lowercase(),
)
}
val tableCatalog =
TableCatalogFactory()
.getTableCatalog(
catalog,
rawTableNameGenerator,
finalTableNameGenerator,
columnNameGenerator,
)
val columnMapping = tableCatalog[stream]!!.columnNameMapping
assertEquals(
mapOf(
"FOO" to "FOO",
"foo" to "foo_1",
),
columnMapping,
)
}
private fun createTestStream(
name: String,
namespace: String,
schema: AirbyteType = ObjectType(linkedMapOf())
): DestinationStream {
return DestinationStream(
unmappedNamespace = namespace,
unmappedName = name,
importType = Append,
schema = schema,
generationId = 1L,
minimumGenerationId = 0L,
syncId = 0L,
namespaceMapper = NamespaceMapper()
)
}
}

View File

@@ -6,7 +6,7 @@ package io.airbyte.cdk.load.toolkits.load.db.orchestration
import io.airbyte.cdk.load.data.AirbyteType
import io.airbyte.cdk.load.data.ObjectValue
import io.airbyte.cdk.load.orchestration.db.ColumnNameGenerator
import io.airbyte.cdk.load.table.ColumnNameGenerator
import io.airbyte.cdk.load.test.util.ExpectedRecordMapper
import io.airbyte.cdk.load.test.util.OutputRecord

View File

@@ -15,6 +15,10 @@ import io.airbyte.cdk.load.message.CheckpointMessage
import io.airbyte.cdk.load.message.InputRecord
import io.airbyte.cdk.load.message.InputStreamCheckpoint
import io.airbyte.cdk.load.message.StreamCheckpoint
import io.airbyte.cdk.load.schema.model.ColumnSchema
import io.airbyte.cdk.load.schema.model.StreamTableSchema
import io.airbyte.cdk.load.schema.model.TableName
import io.airbyte.cdk.load.schema.model.TableNames
import io.airbyte.cdk.load.test.mock.MockDestinationDataDumper
import io.airbyte.cdk.load.test.util.IntegrationTest
import io.airbyte.cdk.load.test.util.NoopDestinationCleaner
@@ -55,6 +59,17 @@ open class AbstractDlqWriteTest(
minimumGenerationId = 0,
syncId = 42,
namespaceMapper = NamespaceMapper(),
tableSchema =
StreamTableSchema(
columnSchema =
ColumnSchema(
inputSchema = mapOf(),
inputToFinalColumnNames = mapOf(),
finalSchema = mapOf(),
),
importType = Append,
tableNames = TableNames(finalTableName = TableName("namespace", "test")),
),
)
val messages =
runSync(

Some files were not shown because too many files have changed in this diff Show More