Co-authored-by: devin-ai-integration[bot] <158243242+devin-ai-integration[bot]@users.noreply.github.com>
227 lines
6.6 KiB
Bash
Executable File
227 lines
6.6 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
|
|
# This script is used to find all modified connector directories in the Airbyte repository.
|
|
# It compares the current branch with the default branch and filters out files that match certain ignore patterns.
|
|
|
|
set -euo pipefail
|
|
|
|
# 0) Collect arguments
|
|
DEFAULT_BRANCH="master"
|
|
JAVA=false
|
|
NO_JAVA=false
|
|
JSON=false
|
|
PREV_COMMIT=false
|
|
LOCAL_CDK=false
|
|
|
|
# parse flags
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--java|java)
|
|
JAVA=true
|
|
;;
|
|
--no-java|no-java)
|
|
NO_JAVA=true
|
|
;;
|
|
--json|json)
|
|
JSON=true
|
|
;;
|
|
--prev-commit|--compare-prev)
|
|
PREV_COMMIT=true
|
|
;;
|
|
--local-cdk|local-cdk)
|
|
LOCAL_CDK=true
|
|
;;
|
|
*)
|
|
echo "Unknown argument: $1" >&2;
|
|
exit 1
|
|
;;
|
|
esac
|
|
shift
|
|
done
|
|
|
|
# 1) Fetch the latest from the default branch (using the correct remote)
|
|
if git remote get-url upstream &>/dev/null; then
|
|
REMOTE="upstream"
|
|
else
|
|
REMOTE="origin"
|
|
fi
|
|
git fetch --quiet "$REMOTE" "$DEFAULT_BRANCH"
|
|
|
|
# 2) set up ignore patterns
|
|
ignore_patterns=(
|
|
'.coveragerc'
|
|
'poe_tasks.toml'
|
|
'airbyte-integrations/connectors/[^/]+/README.md'
|
|
)
|
|
# join with | into a grouped regex
|
|
ignore_globs="($(IFS='|'; echo "${ignore_patterns[*]}"))$"
|
|
|
|
# 3) collect all file changes
|
|
if $PREV_COMMIT; then
|
|
# Compare only the last commit; diff-tree is faster and more precise.
|
|
# Intended for master, where we diff the current squashed commit against the previous squashed commit.
|
|
committed=$(git diff-tree --no-commit-id -r --name-only HEAD)
|
|
staged=""
|
|
unstaged=""
|
|
untracked=""
|
|
else
|
|
# Default behavior
|
|
# This is for a PR branch.
|
|
git fetch --quiet "$REMOTE" "$DEFAULT_BRANCH"
|
|
committed=$(git diff --name-only "${REMOTE}/${DEFAULT_BRANCH}"...HEAD)
|
|
staged=$(git diff --cached --name-only)
|
|
unstaged=$(git diff --name-only)
|
|
untracked=$(git ls-files --others --exclude-standard)
|
|
fi
|
|
|
|
# 4) merge into one list
|
|
all_changes=$(printf '%s\n%s\n%s\n%s' "$committed" "$staged" "$unstaged" "$untracked")
|
|
|
|
# 4.5) Define helper function to return empty JSON when no connectors are found
|
|
return_empty_json() {
|
|
if [ "$JSON" = true ]; then
|
|
# When the list is empty and JSON is requested, send one item as empty string.
|
|
# This allows the matrix to run once as a no-op, and be marked as complete for purposes
|
|
# of required checks.
|
|
echo '{"connector": [""]}'
|
|
fi
|
|
exit 0
|
|
}
|
|
|
|
# 5) drop ignored files
|
|
filtered=$(printf '%s\n' "$all_changes" | grep -v -E "(/${ignore_globs}|^${ignore_globs})")
|
|
if [ -z "$filtered" ]; then
|
|
echo "⚠️ Warning: No files remaining after filtering. Returning empty connector list." >&2
|
|
return_empty_json
|
|
fi
|
|
|
|
# 6) keep only connector paths
|
|
set +e # Ignore errors from grep if no matches are found
|
|
connectors_paths=$(printf '%s\n' "$filtered" | grep -E '^airbyte-integrations/connectors/(source-[^/]+|destination-[^/]+)(/|$)')
|
|
if [ -z "$connectors_paths" ]; then
|
|
echo "⚠️ Warning: No connector paths found. Returning empty connector list." >&2
|
|
return_empty_json
|
|
fi
|
|
set -e
|
|
|
|
# 7) extract just the connector directory name
|
|
dirs=$(printf '%s\n' "$connectors_paths" \
|
|
| sed -E 's|airbyte-integrations/connectors/([^/]+).*|\1|' \
|
|
)
|
|
if [ -z "$dirs" ]; then
|
|
echo "⚠️ Warning: Failed to extract connector directories. Returning empty connector list." >&2
|
|
return_empty_json
|
|
fi
|
|
|
|
# 8) unique list of modified connectors
|
|
connectors=()
|
|
if [ -n "$dirs" ]; then
|
|
while IFS= read -r d; do
|
|
connector_folder="airbyte-integrations/connectors/${d}"
|
|
if [[ -d "$connector_folder" ]]; then
|
|
connectors+=("$d")
|
|
else
|
|
echo "⚠️ '$d' directory was not found. This can happen if a connector is removed. Skipping." >&2
|
|
fi
|
|
done <<< "$(printf '%s\n' "$dirs" | sort -u)"
|
|
fi
|
|
|
|
# 9) Define function to print either JSON or newline-delimited list.
|
|
# JSON will be in GitHub Actions Matrix format: {"connector":[...]}
|
|
print_list() {
|
|
if [ "$JSON" != true ]; then
|
|
for item in "$@"; do
|
|
echo "$item"
|
|
done
|
|
return
|
|
fi
|
|
|
|
# If JSON is requested, convert the list to JSON format.
|
|
# This is pre-formatted to send to a GitHub Actions Matrix
|
|
# with 'connector' as the matrix key.
|
|
# E.g.: {"connector": […]}
|
|
if [ $# -eq 0 ]; then
|
|
return_empty_json
|
|
else
|
|
# If the list is not empty, convert it to JSON format.
|
|
# This is pre-formatted to send to a GitHub Actions Matrix
|
|
# with 'connector' as the matrix key.
|
|
printf '%s\n' "$@" \
|
|
| jq -R . \
|
|
| jq -cs '{connector: .}'
|
|
fi
|
|
}
|
|
|
|
# Allow empty arrays without 'unbound variable' error from here on out.
|
|
set +u
|
|
# 10) If --local-cdk flag is set, also add Java connectors with useLocalCdk = true regardless of changes.
|
|
if $LOCAL_CDK; then
|
|
echo "Finding Java Bulk CDK connectors with version = local..." >&2
|
|
|
|
for connector_dir in airbyte-integrations/connectors/*; do
|
|
if [[ -d "$connector_dir" ]]; then
|
|
# Check if it's a Java connector (either with build.gradle or build.gradle.kts)
|
|
if [ -f "$connector_dir/build.gradle" ] || [ -f "$connector_dir/build.gradle.kts" ]; then
|
|
connector_name=$(basename "$connector_dir")
|
|
|
|
# Determine which build file exists
|
|
build_file="build.gradle"
|
|
if [ -f "$connector_dir/build.gradle.kts" ]; then
|
|
build_file="build.gradle.kts"
|
|
fi
|
|
|
|
# Search for cdk = 'local' or cdk = "local" in airbyteBulkConnector block
|
|
if grep -q "airbyteBulkConnector" "$connector_dir/$build_file" && grep -q "cdk *= *['\"]local['\"]" "$connector_dir/$build_file"; then
|
|
connectors+=("$connector_name")
|
|
fi
|
|
fi
|
|
fi
|
|
done
|
|
|
|
# Remove any duplicates using sort, parse it using mapfile and assign to $connectors.
|
|
mapfile -t connectors < <(printf '%s\n' "${connectors[@]}" | sort -u)
|
|
fi
|
|
|
|
# 11) Print all if no filters applied
|
|
if ! $JAVA && ! $NO_JAVA; then
|
|
print_list "${connectors[@]}"
|
|
exit 0
|
|
fi
|
|
|
|
# 11) scan metadata.yaml to identify java connectors
|
|
java_connectors=()
|
|
for c in "${connectors[@]}"; do
|
|
metadata="airbyte-integrations/connectors/${c}/metadata.yaml"
|
|
if [[ ! -f "$metadata" ]]; then
|
|
echo "⚠️ metadata.yaml not found for '$c' (looking at $metadata)" >&2
|
|
continue
|
|
fi
|
|
if grep -qE 'language:java' "$metadata"; then
|
|
# echo "✅ Found java connector: '$c' (looking at $metadata)" >&2
|
|
java_connectors+=("$c")
|
|
fi
|
|
done
|
|
|
|
if $JAVA; then
|
|
set +u # Allow empty array without 'unbound variable' error
|
|
print_list "${java_connectors[@]}"
|
|
exit 0
|
|
fi
|
|
|
|
# 12) derive non-java by subtraction
|
|
non_java_connectors=()
|
|
for c in "${connectors[@]}"; do
|
|
if ! printf '%s\n' "${java_connectors[@]}" | grep -Fxq "$c"; then
|
|
non_java_connectors+=("$c")
|
|
fi
|
|
done
|
|
|
|
if $NO_JAVA; then
|
|
print_list "${non_java_connectors[@]}"
|
|
exit 0
|
|
fi
|
|
|
|
# We should never reach here
|
|
echo "⚠️ Unknown error occurred. Please check the script." >&2
|
|
exit 1
|