From 495d7a318e85a02fbbf6543d7dfc41d87d037f78 Mon Sep 17 00:00:00 2001 From: "Aaron (\"AJ\") Steers" Date: Wed, 27 Aug 2025 20:05:56 -0700 Subject: [PATCH] ci: make `use-cdk-*` poe tasks extras-aware, add prerelease CI check for non-prod CDK versions (#62525) Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- .github/workflows/connector-ci-checks.yml | 19 +++ poe-tasks/detect-python-cdk.py | 167 ++++++++++++++++++++++ poe-tasks/poetry-connector-tasks.toml | 60 +++++++- 3 files changed, 239 insertions(+), 7 deletions(-) create mode 100755 poe-tasks/detect-python-cdk.py diff --git a/.github/workflows/connector-ci-checks.yml b/.github/workflows/connector-ci-checks.yml index 12f60ee7d27..994f726cce9 100644 --- a/.github/workflows/connector-ci-checks.yml +++ b/.github/workflows/connector-ci-checks.yml @@ -430,6 +430,13 @@ jobs: - name: Install uv if: matrix.connector uses: astral-sh/setup-uv@v6 + + - name: Install Poe + if: matrix.connector + run: | + # Install Poe so we can run the connector tasks: + uv tool install poethepoet + - name: Install QA Checks if: matrix.connector run: | @@ -441,6 +448,18 @@ jobs: connector_name=${{ matrix.connector }} connectors-qa run --name ${connector_name%-strict-encrypt} + - name: Detect Python CDK Prerelease Versions + if: matrix.connector + working-directory: airbyte-integrations/connectors/${{ matrix.connector }} + run: | + # Exit with code 1 if the CDK is not pinned to a standard version. + # This is a no-op for non-Python connectors. + if [[ $(poe -qq get-language) == "python" ]]; then + poe detect-cdk-prerelease + else + echo "Skipping CDK pre-release check for non-Python connector." + fi + connector-ci-checks-summary: name: Aggregate Results if: always() diff --git a/poe-tasks/detect-python-cdk.py b/poe-tasks/detect-python-cdk.py new file mode 100755 index 00000000000..3125d14939f --- /dev/null +++ b/poe-tasks/detect-python-cdk.py @@ -0,0 +1,167 @@ +#!/usr/bin/env -S uv run --script +# Copyright (c) 2025 Airbyte, Inc., all rights reserved. +# +# /// script +# requires-python = ">=3.10" +# dependencies = ["tomli"] +# /// + +""" +Detect and analyze airbyte-cdk dependency information from pyproject.toml files. + +This script provides multiple modes for analyzing CDK dependencies: +- JSON output with complete dependency information +- Extras-only output for poetry add commands +- Version pin verification for production readiness + +The script uses uv's automatic virtual environment management to handle dependencies. +For more information about uv script execution, see: +https://docs.astral.sh/uv/guides/scripts/#using-a-shebang-to-create-an-executable-file + +For details about PEP 723 inline script metadata format, see: +https://peps.python.org/pep-0723/#how-to-teach-this + +Usage: + ./detect-python-cdk.py [directory] + Return JSON string with complete CDK dependency information + + ./detect-python-cdk.py --extras-only [directory] + Return string for use in: poetry add "airbyte-cdk$OUTPUT@version" + Output examples: "" (no extras), "[sql]", "[sql,vector-db-based]" + + ./detect-python-cdk.py --detect-prerelease [directory] + Exit 0 if CDK pinned to standard version, exit 1 if git/local/non-standard ref + Provides guidance for resolving non-production references + +Examples: + ./detect-python-cdk.py /path/to/destination-motherduck + {"version": "^6.0.0", "extras": ["sql"], "type": "standard", "is_production_ready": true} + + ./detect-python-cdk.py --extras-only /path/to/destination-motherduck + [sql] + + ./detect-python-cdk.py --detect-prerelease /path/to/destination-motherduck + ✅ Production ready: Standard version: ^6.0.0 with extras ['sql'] +""" + +import argparse +import json +import re +import sys +from pathlib import Path +from typing import cast + + +try: + import tomli +except ImportError: + import tomllib as tomli + + +def parse_cdk_dependency(pyproject_path) -> dict: + """Parse CDK dependency from pyproject.toml and return structured information. + + Base version strings will be normalized to {"version": "x.y.z"} format. + + Returns: + dict: Complete dependency information including version, extras, type, etc. + """ + try: + with open(pyproject_path, "rb") as f: + data = tomli.load(f) + except Exception as e: + return {"error": f"Error reading pyproject.toml: {e}"} + + dependencies = data.get("tool", {}).get("poetry", {}).get("dependencies", {}) + cdk_dep = dependencies.get("airbyte-cdk") + + if not cdk_dep: + return {"error": "No airbyte-cdk dependency found"} + + if isinstance(cdk_dep, str): + # Normalize concise version syntax like `airbyte-cdk = "^6.0.0"` + cdk_dep = {"version": cdk_dep} + + result = cast(dict[str, str | bool], cdk_dep.copy()) + result["dependency_type"] = "unknown" + for dependency_type in ["version", "git", "path", "url"]: + if dependency_type in result: + result["dependency_type"] = dependency_type + if dependency_type == "version": + result["is_prerelease"] = is_prerelease_version(cdk_dep["version"]) + + break + + return result + + +def is_prerelease_version(version_str) -> bool: + """Check if version string represents a standard published version.""" + if not version_str: + return True + + version_pattern = r"^[~^>=<]*\d+\.\d+\.\d+([a-zA-Z0-9\-\.]*)?$" + is_prod_version = bool(re.match(version_pattern, version_str.strip())) + return not is_prod_version + + +def format_extras_for_poetry(extras) -> str: + """Format extras list for use in poetry add command. + + E.g. if extras is ['sql', 'vector-db-based'], return "[sql,vector-db-based]". + """ + if not extras: + return "" + + return f"[{','.join(extras)}]" + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Detect and analyze airbyte-cdk dependency information", formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument("directory", nargs="?", default=".", help="Directory containing pyproject.toml (default: current directory)") + + mode_group = parser.add_mutually_exclusive_group() + mode_group.add_argument("--extras-only", action="store_true", help="Return extras string for poetry add command") + mode_group.add_argument("--detect-prerelease", action="store_true", help="Verify CDK is pinned to standard version (exit 1 if not)") + + args = parser.parse_args() + + connector_dir = Path(args.directory) + pyproject_path = connector_dir / "pyproject.toml" + + if not pyproject_path.exists(): + if args.extras_only: + return + elif args.detect_prerelease: + print(f"Error: pyproject.toml not found in {connector_dir}") + sys.exit(1) + else: + print(json.dumps({"error": f"pyproject.toml not found in {connector_dir}"})) + return + + cdk_info = parse_cdk_dependency(pyproject_path) + + if args.extras_only: + extras = cdk_info.get("extras", []) + print(format_extras_for_poetry(extras), flush=True) + else: + print(json.dumps(cdk_info), flush=True) + + if args.detect_prerelease: + if cdk_info.get("is_prerelease") is not False: + print( + "❌ Pre-release CDK version detected.\n" + "📝 Before merging your PR, remember to run `poe use-cdk-latest` to re-pin to the " + "latest production CDK version.", + flush=True, + file=sys.stderr, + ) + sys.exit(1) + + print(f"✅ Production ready CDK version: {cdk_info.get('version')}", flush=True, file=sys.stderr) + + +if __name__ == "__main__": + main() diff --git a/poe-tasks/poetry-connector-tasks.toml b/poe-tasks/poetry-connector-tasks.toml index 9e83d5fa192..1634f3f705b 100644 --- a/poe-tasks/poetry-connector-tasks.toml +++ b/poe-tasks/poetry-connector-tasks.toml @@ -98,12 +98,36 @@ fix-and-check = [ # Fix everything fixable, then see if checks pass # poe use-cdk-branch 'aj/my-branch-name' # Pin to a specific branch # poe use-cdk-branch-active # Pin to the branch of the local CDK repo +[tool.poe.tasks.detect-cdk-extras] +cmd = "${POE_GIT_DIR}/poe-tasks/detect-python-cdk.py --extras-only" +help = "Detect currently installed CDK extras from pyproject.toml file. Use with -qq to quiet unrelated outputs." + +[tool.poe.tasks.detect-cdk-prerelease] +cmd = "${POE_GIT_DIR}/poe-tasks/detect-python-cdk.py --detect-prerelease" +help = "Check if connector is using non-production CDK references (git/local refs). Returns 0 for production-ready, 1 for prerelease or non-production versions." + +[tool.poe.tasks.detect-cdk-info] +cmd = "${POE_GIT_DIR}/poe-tasks/detect-python-cdk.py" +help = "Get complete CDK dependency information as JSON from pyproject.toml file." + [tool.poe.tasks.use-cdk-latest] -cmd = 'poetry add airbyte-cdk@latest' +shell = ''' +set -eu +EXTRAS=$(poe -qq detect-cdk-extras) +DEP_STR="airbyte-cdk${EXTRAS}@latest" +echo Running: poetry add \"${DEP_STR}\" +poetry add "${DEP_STR}" +''' help = "Pin to the latest version of the CDK." [tool.poe.tasks.use-cdk-version] -cmd = 'poetry add "airbyte-cdk@${VERSION}"' +shell = ''' +set -eu +EXTRAS=$(poe -qq detect-cdk-extras) +DEP_STR="airbyte-cdk${EXTRAS}@${VERSION}" +echo Running: poetry add \"${DEP_STR}\" +poetry add "${DEP_STR}" +''' args = [ { name = "VERSION", positional = true, default = "latest" }, ] @@ -111,20 +135,42 @@ help = "Pin to a specific version of the CDK." [tool.poe.tasks.use-cdk-branch-active] shell = ''' - REPO_ROOT=$(git rev-parse --show-toplevel) - ACTIVE_CDK_BRANCH=$(git -C "$REPO_ROOT/../airbyte-python-cdk" rev-parse --abbrev-ref HEAD) - echo "Attempting to pin CDK to branch '$ACTIVE_CDK_BRANCH' from the local repo." - poetry add "git+https://github.com/airbytehq/airbyte-python-cdk.git#${ACTIVE_CDK_BRANCH}" +set -eu +echo "Detecting active CDK branch..." +REPO_ROOT=$(git rev-parse --show-toplevel) +ACTIVE_CDK_BRANCH=$(git -C "$REPO_ROOT/../airbyte-python-cdk" rev-parse --abbrev-ref HEAD) +EXTRAS=$(poe -qq detect-cdk-extras) +DEP_STR="git+https://github.com/airbytehq/airbyte-python-cdk.git@${ACTIVE_CDK_BRANCH}${EXTRAS}" +echo Running: poetry add \"${DEP_STR}\" +poetry add "${DEP_STR}" ''' help = "Pin to the branch of the CDK that is currently checked out locally." [tool.poe.tasks.use-cdk-branch] -cmd = 'poetry add "git+https://github.com/airbytehq/airbyte-python-cdk.git#${BRANCH}"' +shell = ''' +set -eu +EXTRAS=$(poe -qq detect-cdk-extras) +DEP_STR="git+https://github.com/airbytehq/airbyte-python-cdk.git@${BRANCH}${EXTRAS}" +echo Running: poetry add \"${DEP_STR}\" +poetry add "${DEP_STR}" +''' args = [ { name = "BRANCH", positional = true, default = "main" }, ] help = "Pin to a specific branch of the CDK." +[tool.poe.tasks.use-cdk-local] +shell = ''' +set -eu +REPO_ROOT=$(git rev-parse --show-toplevel) +CDK_ROOT=${REPO_ROOT}/../airbyte-python-cdk +EXTRAS=$(poe -qq detect-cdk-extras) +DEP_STR="${CDK_ROOT}${EXTRAS}" +echo Running: poetry add \"${DEP_STR}\" +poetry add "${DEP_STR}" +''' +help = "Pin to your local working copy of the CDK, in editable mode. (Expects that the CDK and airbyte repo are sibling directories.)" + # Generic tasks (same across all connector types) [tool.poe.tasks.get-language]