From 6dbd718fed5ab725066a7b49ea02d98dd18567d7 Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Tue, 7 Oct 2025 15:15:36 -0700 Subject: [PATCH] Bulk CDK: add GHA cron to bump CDK version in certified connectors (#66826) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## What Part of https://github.com/airbytehq/airbyte-internal-issues/issues/14308. Example run https://github.com/airbytehq/airbyte/actions/runs/18284850032, example PR https://github.com/airbytehq/airbyte/pull/67093/. See also https://github.com/airbytehq/airbyte-enterprise/pull/257. The cron runs on the first Monday of every month. Past discussions wanted every 3 weeks, but that's a PITA to do in GHA. I added one more commit after that run: https://github.com/airbytehq/airbyte/commit/679081a47d44b5f6a3c75f1f9da2c729635fd05d. Just changing the trigger to be on a cron, and marking the PR as ready for review. There's no automation on actually merging the PRs, but they'll tag the appropriate team(s) for review. In principle, reviewers can just click the merge button on green CI though. There's some weird behavior where the PR sometimes fails to submit b/c of push protection rules ([example](https://github.com/airbytehq/airbyte/actions/runs/18235269794/job/51927690655#step:12:119)). @wennergr is looking into this. ## How Use the various gradle tasks we've added to upgrade the CDK version. I wanted to use the [bump-version command](https://github.com/airbytehq/airbyte/actions/workflows/bump-version-command.yml) to do the metadata+changelog, but it seems to be [broken](https://airbytehq-team.slack.com/archives/C02U9R3AF37/p1759499392004199). Got claude to write a hacky equivalent. (a lot of the bash stuff is also from claude, but heavily edited for readability+comments+style. I've done enough testing to be confident in it though.) ## Can this PR be safely reverted and rolled back? - [x] YES 💚 - [ ] NO ❌ --- .../auto-upgrade-certified-connectors-cdk.yml | 163 ++++++++++++++++++ .../bump-connector-metadata.sh | 30 ++++ .../list-connectors-to-upgrade.sh | 52 ++++++ .../populate-connector-changelog.sh | 52 ++++++ 4 files changed, 297 insertions(+) create mode 100644 .github/workflows/auto-upgrade-certified-connectors-cdk.yml create mode 100755 tools/bin/bulk-cdk-auto-upgrade/bump-connector-metadata.sh create mode 100755 tools/bin/bulk-cdk-auto-upgrade/list-connectors-to-upgrade.sh create mode 100755 tools/bin/bulk-cdk-auto-upgrade/populate-connector-changelog.sh diff --git a/.github/workflows/auto-upgrade-certified-connectors-cdk.yml b/.github/workflows/auto-upgrade-certified-connectors-cdk.yml new file mode 100644 index 00000000000..2ea9a755083 --- /dev/null +++ b/.github/workflows/auto-upgrade-certified-connectors-cdk.yml @@ -0,0 +1,163 @@ +name: Auto Upgrade CDK for Certified Connectors +on: + schedule: + # Run at 16:37 UTC on the first of every month + - cron: "37 16 1 * *" + workflow_dispatch: + workflow_call: + inputs: + repositories: + description: "Repository name for GitHub App authentication" + required: false + type: string + default: "airbyte" + +jobs: + list-certified-connectors: + name: List Certified Connectors + runs-on: ubuntu-24.04 + outputs: + connectors: ${{ steps.list-connectors.outputs.connectors }} + steps: + - name: Checkout Airbyte + uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4 + with: + # Needed for when airbyte-enterprise calls this workflow + submodules: true + + - name: Install yq + run: sudo snap install yq + + - name: List certified connectors + id: list-connectors + run: | + json_array=$(tools/bin/bulk-cdk-auto-upgrade/list-connectors-to-upgrade.sh) + echo "connectors=$json_array" >> $GITHUB_OUTPUT + echo "Found bulk CDK connectors to upgrade: $json_array" + + upgrade-connector-cdk: + name: Upgrade CDK for ${{ matrix.connector }} + needs: list-certified-connectors + runs-on: ubuntu-24.04 + strategy: + matrix: + connector: ${{ fromJson(needs.list-certified-connectors.outputs.connectors) }} + fail-fast: false + max-parallel: 5 + steps: + - name: Authenticate as GitHub App + uses: actions/create-github-app-token@67018539274d69449ef7c02e8e71183d1719ab42 # v2 + id: app-token + with: + owner: "airbytehq" + repositories: ${{ inputs.repositories || 'airbyte' }} + app-id: ${{ secrets.OCTAVIA_BOT_APP_ID }} + private-key: ${{ secrets.OCTAVIA_BOT_PRIVATE_KEY }} + - name: Get GitHub App User ID + id: get-user-id + run: echo "user-id=$(gh api "/users/${{ steps.app-token.outputs.app-slug }}[bot]" --jq .id)" >> "$GITHUB_OUTPUT" + env: + GH_TOKEN: ${{ steps.app-token.outputs.token }} + - run: | + git config --global user.name '${{ steps.app-token.outputs.app-slug }}[bot]' + git config --global user.email '${{ steps.get-user-id.outputs.user-id }}+${{ steps.app-token.outputs.app-slug }}[bot]@users.noreply.github.com' + + - name: Checkout Airbyte + uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4 + with: + token: ${{ secrets.GITHUB_TOKEN }} + # Needed for when airbyte-enterprise calls this workflow + submodules: true + + - name: Install yq + run: sudo snap install yq + + - name: Setup Java + uses: actions/setup-java@c5195efecf7bdfc987ee8bae7a71cb8b11521c00 # v4 + with: + distribution: "zulu" + java-version: "21" + + - name: Setup Gradle + uses: gradle/actions/setup-gradle@80e941e61874822d2a89974089c4915748e8f4b7 # v4 + + - name: Run upgradeCdk for ${{ matrix.connector }} + id: upgrade-cdk + run: | + set -euo pipefail + ./gradlew :airbyte-integrations:connectors:${{ matrix.connector }}:upgradeCdk + # --quiet disables all of gradle's normal logging, so we only get the CDK version number + new_cdk_version=$(./gradlew :airbyte-integrations:connectors:${{ matrix.connector }}:getCdkVersion --quiet) + echo "new_cdk_version=$new_cdk_version" >> $GITHUB_OUTPUT + + - name: Check for changes + id: check-changes + if: steps.upgrade-cdk.outputs.exit_code == '0' + run: | + if git diff --quiet; then + echo "has_changes=false" >> $GITHUB_OUTPUT + echo "No changes detected for ${{ matrix.connector }}" + else + echo "has_changes=true" >> $GITHUB_OUTPUT + echo "Changes detected for ${{ matrix.connector }}" + fi + + - name: Bump connector version + id: bump-version + if: steps.check-changes.outputs.has_changes == 'true' + run: | + set -euo pipefail + new_version=$(tools/bin/bulk-cdk-auto-upgrade/bump-connector-metadata.sh "${{ matrix.connector }}") + echo "Updated dockerImageTag to $new_version" + echo "new_version=$new_version" >> $GITHUB_OUTPUT + + - name: Create Pull Request + id: create-pr + if: steps.check-changes.outputs.has_changes == 'true' + uses: peter-evans/create-pull-request@c5a7806660adbe173f04e3e038b0ccdcd758773c # v6 + with: + token: ${{ steps.app-token.outputs.token }} + commit-message: "chore: upgrade bulk CDK for ${{ matrix.connector }}" + branch: "auto-upgrade-jvm-bulk-cdk/${{ steps.upgrade-cdk.outputs.new_cdk_version }}/${{ matrix.connector }}" + delete-branch: true + title: "chore: upgrade ${{ matrix.connector }} to bulk CDK ${{ steps.upgrade-cdk.outputs.new_cdk_version }}" + body: | + Upgrade Bulk CDK version for `${{ matrix.connector }}` + + 🤖 Generated by [automated workflow](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}). + labels: | + area/connectors + auto-cdk-upgrade + assignees: "" + # We'll rely on CODEOWNERS to tag the right people. + reviewers: "" + # Submit as draft: true, because we need to push another commit to this branch to update the changelog. + # The `ready for review` step is where we switch the PR out of draft mode. + draft: true + + # Update the connector changelog and push the commit to the new branch + - name: Update connector changelog + # airbyte-enterprise doesn't have changelogs, so only do this in the OSS repo. + if: steps.check-changes.outputs.has_changes == 'true' && inputs.repositories != 'airbyte-enterprise' + run: | + set -euo pipefail + tools/bin/bulk-cdk-auto-upgrade/populate-connector-changelog.sh \ + "${{ matrix.connector }}" \ + "${{ steps.bump-version.outputs.new_version }}" \ + "${{ steps.create-pr.outputs.pull-request-number }}" \ + "Upgrade to Bulk CDK ${{ steps.upgrade-cdk.outputs.new_cdk_version }}." + + # Commit and push the changelog changes + branch_name="${{ steps.create-pr.outputs.pull-request-branch }}" + git fetch --depth=1 origin "$branch_name" + git checkout "$branch_name" + git add docs + git commit -m "update changelog for ${{ matrix.connector }}" + git push origin "$branch_name" + + # We've pushed all the changes to the PR. Mark it ready for review. + - name: Ready for review + if: steps.check-changes.outputs.has_changes == 'true' + run: gh pr ready "${{ steps.create-pr.outputs.pull-request-number }}" + env: + GH_TOKEN: ${{ steps.app-token.outputs.token }} diff --git a/tools/bin/bulk-cdk-auto-upgrade/bump-connector-metadata.sh b/tools/bin/bulk-cdk-auto-upgrade/bump-connector-metadata.sh new file mode 100755 index 00000000000..5207e314660 --- /dev/null +++ b/tools/bin/bulk-cdk-auto-upgrade/bump-connector-metadata.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# Performs a patch version bump in the metadata.yaml of the given connector. +# Prints the new version number. +# Usage: tools/bin/bulk-cdk-auto-upgrade/bump-connector-metadata.sh destination-dev-null + +connector_name=$1 + +metadata_file="airbyte-integrations/connectors/$connector_name/metadata.yaml" + +# Extract current version +current_version=$(yq '.data.dockerImageTag' "$metadata_file") + +# Parse version components +IFS='.' read -r major minor patch <<< "$current_version" + +# Increment patch version +new_patch=$((patch + 1)) +new_version="${major}.${minor}.${new_patch}" + +# Update metadata.yaml +# Don't use `yq` for this. yq introduces unrelated diffs and does not preserve formatting. +# In particular, yq may introduce diffs that cause our formatter to complain. +# Don't use `-i` b/c it's not platform-agnostic (macos requires `-i ''`, but that doesn't work on linux) +sed "s/dockerImageTag: ${current_version}/dockerImageTag: ${new_version}/" "$metadata_file" > "$metadata_file.tmp" +mv "$metadata_file.tmp" "$metadata_file" + +echo "$new_version" diff --git a/tools/bin/bulk-cdk-auto-upgrade/list-connectors-to-upgrade.sh b/tools/bin/bulk-cdk-auto-upgrade/list-connectors-to-upgrade.sh new file mode 100755 index 00000000000..e2b6abaf984 --- /dev/null +++ b/tools/bin/bulk-cdk-auto-upgrade/list-connectors-to-upgrade.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# List all certified connectors, which are using the bulk CDK, and don't have a weird version number (e.g. -rc suffix). +# And also source-datagen + destination-dev-null. +# Prints the result as a JSON array. + +connectors=() + +# datagen and dev-null aren't certified, but we should probably keep them on the latest CDK anyway +default_connectors=(destination-dev-null source-datagen) +# check whether these connectors exist. This is to support airbyte-enterprise. +for connector in "${default_connectors[@]}"; do + metadata_file="airbyte-integrations/connectors/${connector}/metadata.yaml" + build_gradle="airbyte-integrations/connectors/${connector}/build.gradle" + build_gradle_kts="airbyte-integrations/connectors/${connector}/build.gradle.kts" + + # If metadata.yaml exists + if test -f "$metadata_file"; then + # If we're on a "normal" version (e.g. 12.34.56) + if echo $(yq '.data.dockerImageTag' "$metadata_file") | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+$'; then + connectors+=("$connector") + fi + fi +done + +for dir in airbyte-integrations/connectors/*; do + metadata_file="${dir}/metadata.yaml" + build_gradle="${dir}/build.gradle" + build_gradle_kts="${dir}/build.gradle.kts" + + # If metadata.yaml exists and says we're certified + if (test -f "$metadata_file") && (test $(yq '.data.supportLevel' "$metadata_file") = 'certified'); then + # If we have a gradle buildscript using the bulk connector plugin + if (test -f "$build_gradle" && grep -q "airbyte-bulk-connector" "$build_gradle") || \ + (test -f "$build_gradle_kts" && grep -q "airbyte-bulk-connector" "$build_gradle_kts"); then + # If we're on a "normal" version (e.g. 12.34.56) + if echo $(yq '.data.dockerImageTag' "$metadata_file") | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+$'; then + connector_name=$(basename "$dir") + connectors+=("$connector_name") + fi + fi + fi +done + +# Nonobvious `printf | jq | jq` thing here: +# Print each element of the array on a separate line +# -> jq converts each line to a JSON string (i.e. wrap in double quotes) +# -> jq reads those lines and wraps them into a JSON array (compact-output is needed for compatibility with github's output format) +json_array=$(printf '%s\n' "${connectors[@]}" | jq --raw-input . | jq --compact-output --slurp .) +echo "$json_array" diff --git a/tools/bin/bulk-cdk-auto-upgrade/populate-connector-changelog.sh b/tools/bin/bulk-cdk-auto-upgrade/populate-connector-changelog.sh new file mode 100755 index 00000000000..95fbc0ce2d8 --- /dev/null +++ b/tools/bin/bulk-cdk-auto-upgrade/populate-connector-changelog.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# Adds a new changelog entry for the given connector. +# Usage: tools/bin/bulk-cdk-auto-upgrade/populate-connector-changelog.sh +# E.g.: tools/bin/bulk-cdk-auto-upgrade/populate-connector-changelog.sh destination-dev-null 1.2.3 1234 'upgrade foo' + +connector_id="$1" +new_version="$2" +pr_number="$3" +changelog_text="$4" + +# Convert connector name to docs path: +# destination-dev-null -> docs/integrations/destinations/dev-null.md +# source-postgres -> docs/integrations/sources/postgres.md +connector_type=$(echo "${connector_id}" | cut -d'-' -f1) +connector_name=$(echo "${connector_id}" | cut -d'-' -f2-) +docs_file="docs/integrations/${connector_type}s/${connector_name}.md" + +if ! test -f "$docs_file"; then + echo "Docs file not found at $docs_file. This connector is probably doing something weird." >&2 + exit 1 +fi + +# YYYY-MM-DD format +today=$(date +%Y-%m-%d) + +new_entry="| ${new_version} | ${today} | [${pr_number}](https://github.com/airbytehq/airbyte/pull/${pr_number}) | ${changelog_text} |" + +# Find the changelog table and insert the new entry after the header row +# The changelog table starts with a header row (`| Version | Date | Pull Request | Subject |`), +# followed by a separator line. +# We want to insert the new entry after the separator line. +# The awk script is doing exactly that: +# When it sees a line matching the changelog header row, awk prints that line and sets the `header` flag. +# If the `header` flag is set and awk sees the separator line, awk prints the separator followed by the new changelog entry. +# Otherwise, awk just prints the line unchanged. +awk -v entry="$new_entry" ' + /^\| Version *\| Date *\| Pull Request *\| Subject *\|$/ { print; header=1; next } + header && /^\|:?-+\|:?-+\|:?-+\|:?-+\|$/ { print; print entry; header=0; next } + { print } +' "$docs_file" > "${docs_file}.tmp" + +if cmp -s "$docs_file" "${docs_file}.tmp"; then + echo "Error: awk command made no edits to $docs_file. Changelog table may not exist or has unexpected format." >&2 + exit 1 +fi + +mv "${docs_file}.tmp" "$docs_file" + +echo "Added changelog entry to $docs_file"