1
0
mirror of synced 2025-12-19 18:14:56 -05:00

upload metadata to dev bucket via GHA (#64534)

This commit is contained in:
Edward Gao
2025-08-11 10:10:30 -07:00
committed by GitHub
parent be4d91cb1f
commit 903a3921cb
3 changed files with 148 additions and 1 deletions

View File

@@ -103,6 +103,15 @@ jobs:
with:
version: 1.8.5
# We're intentionally not using the `google-github-actions/auth` action.
# The upload-connector-metadata step runs a script which handles auth manually.
# This is because we're writing files to multiple buckets, using different credentials
# for each bucket.
# (it's unclear whether that's actually necessary)
- name: Install gcloud
# v2.1.5
uses: google-github-actions/setup-gcloud@6a7c903a70c8625ed6700fa299f5ddb4ca6022e9
- name: Install metadata_service
run: poetry install --directory airbyte-ci/connectors/metadata_service/lib
@@ -174,6 +183,15 @@ jobs:
airbyte_ci_binary_url: ${{ inputs.airbyte_ci_binary_url }}
max_attempts: 2
- name: Upload connector metadata
id: upload-connector-metadata
shell: bash
run: ./poe-tasks/upload-connector-metadata.sh --name ${{ matrix.connector }}
env:
GCS_CREDENTIALS: ${{ secrets.METADATA_SERVICE_DEV_GCS_CREDENTIALS }}
SPEC_CACHE_GCS_CREDENTIALS: ${{ secrets.METADATA_SERVICE_DEV_GCS_CREDENTIALS }}
METADATA_SERVICE_GCS_CREDENTIALS: ${{ secrets.METADATA_SERVICE_DEV_GCS_CREDENTIALS }}
notify-failure-slack-channel:
name: "Notify Slack Channel on Publish Failures"
runs-on: ubuntu-24.04

View File

@@ -3,7 +3,8 @@
# You can't just `source lib/util.sh`, because the current working directory probably isn't `poe-tasks`.
CONNECTORS_DIR="airbyte-integrations/connectors"
DOCS_BASE_DIR="docs/integrations"
DOCS_ROOT="docs"
DOCS_BASE_DIR="$DOCS_ROOT/integrations"
METADATA_SERVICE_PATH='airbyte-ci/connectors/metadata_service/lib'
# Usage: connector_docs_path "source-foo"
@@ -50,3 +51,15 @@ generate_dev_tag() {
hash=$(git rev-parse --short=10 HEAD)
echo "${base}-dev.${hash}"
}
# Authenticate to gcloud using the contents of a variable.
# That variable should contain a JSON-formatted GCP service account key.
gcloud_activate_service_account() {
touch /tmp/gcloud_creds.json
# revoke access to this file from group/other (`go=` means "for Group/Other, set permissions to nothing")
# (i.e. only the current user can interact with it)
chmod go= /tmp/gcloud_creds.json
# echo -E prevents echo from rendering \n into actual newlines.
echo -E "$1" > /tmp/gcloud_creds.json
gcloud auth activate-service-account --key-file /tmp/gcloud_creds.json
}

View File

@@ -0,0 +1,116 @@
#!/usr/bin/env bash
set -euo pipefail
# Uploads the metadata (+SBOM+spec cache) to GCS.
# Usage: ./poe-tasks/upload-connector-metadata.sh --name destination-bigquery [--pre-release] [--main-release]
# You must have three environment variables set (GCS_CREDENTIALS, METADATA_SERVICE_GCS_CREDENTIALS, SPEC_CACHE_GCS_CREDENTIALS),
# each containing a JSON-formatted GCP service account key.
# SPEC_CACHE_GCS_CREDENTIALS needs write access to `gs://$spec_cache_bucket/specs`.
# METADATA_SERVICE_GCS_CREDENTIALS needs write access to `gs://$metadata_bucket/sbom`.
# GCS_CREDENTIALS needs write access to `gs://$metadata_bucket/metadata`.
source "${BASH_SOURCE%/*}/lib/util.sh"
source "${BASH_SOURCE%/*}/lib/parse_args.sh"
connector=$(get_only_connector)
if ! test "$SPEC_CACHE_GCS_CREDENTIALS"; then
echo "SPEC_CACHE_GCS_CREDENTIALS environment variable must be set" >&2
exit 1
fi
if ! test "$METADATA_SERVICE_GCS_CREDENTIALS"; then
echo "METADATA_SERVICE_GCS_CREDENTIALS environment variable must be set" >&2
exit 1
fi
if ! test "$GCS_CREDENTIALS"; then
echo "GCS_CREDENTIALS environment variable must be set" >&2
exit 1
fi
spec_cache_bucket="dev-airbyte-cloud-connector-metadata-service"
metadata_bucket="dev-airbyte-cloud-connector-metadata-service"
syft_docker_image="anchore/syft:v1.6.0"
sbom_extension="spdx.json"
meta="${CONNECTORS_DIR}/${connector}/metadata.yaml"
doc="$(connector_docs_path $connector)"
docker_repository=$(yq -r '.data.dockerRepository' "$meta")
if test -z "$docker_repository" || test "$docker_repository" = "null"; then
echo "Error: docker_repository missing in ${meta}" >&2
exit 1
fi
# Figure out the tag that we're working on (i.e. handle the prerelease case)
base_tag=$(yq -r '.data.dockerImageTag' "$meta")
if test -z "$base_tag" || test "$base_tag" = "null"; then
echo "Error: dockerImageTag missing in ${meta}" >&2
exit 1
fi
if test "$publish_mode" = "main-release"; then
docker_tag="$base_tag"
else
docker_tag=$(generate_dev_tag "$base_tag")
fi
full_docker_image="$docker_repository:$docker_tag"
# Upload the specs to the spec cache
run_connector_spec() {
local deployment_mode=$1
local output_file=$2
# Run the spec command, filter for SPEC messages, and write those messages to the output file.
# The jq command has a lot going on:
# * --raw-input is needed, because many connectors emit some log messages in non-JSON format
# * then we use `fromjson?` to filter for valid JSON messages
# * and then we select any spec message (i.e. {"type": "SPEC", "spec": {...}})
# * and then we extract just the `spec` field.
docker run --env DEPLOYMENT_MODE=$deployment_mode "$full_docker_image" spec | jq --raw-input --compact-output 'fromjson? | select(.type == "SPEC").spec' > $output_file
# Verify that we had exactly one spec message.
# Depending on the platform, `wc -l` may return a right-padded string like " 1".
# `tr -d ' '` deletes those spaces.
local specMessageCount=$(cat $output_file | wc -l | tr -d ' ')
if test $specMessageCount -ne 1; then
echo "Expected to get exactly one spec message from the connector when running with deployment mode '$deployment_mode'; got $specMessageCount" >&2
exit 1
fi
}
echo '--- UPLOADING SPEC TO SPEC CACHE ---'
echo 'Running spec for OSS...'
run_connector_spec OSS spec.json
echo 'Running spec for CLOUD...'
run_connector_spec CLOUD spec.cloud.json
spec_cache_base_path="gs://$spec_cache_bucket/specs/$docker_repository/$docker_tag"
gcloud_activate_service_account "$SPEC_CACHE_GCS_CREDENTIALS"
gsutil cp spec.json "$spec_cache_base_path/spec.json"
# Only upload spec.cloud.json if it's different from spec.json.
# somewhat confusingly - `diff` returns true if the files are _identical_, so we need `! diff`.
if ! diff spec.json spec.cloud.json; then
gsutil cp spec.cloud.json "$spec_cache_base_path/spec.cloud.json"
fi
# Upload the SBOM
echo '--- UPLOADING SBOM ---'
docker run \
--volume $HOME/.docker/config.json:/config/config.json \
--env DOCKER_CONFIG=/config \
"$syft_docker_image" \
-o spdx-json \
"$full_docker_image" > "$sbom_extension"
gcloud_activate_service_account "$METADATA_SERVICE_GCS_CREDENTIALS"
gsutil cp "$sbom_extension" "gs://$metadata_bucket/sbom/$docker_repository/$docker_tag.$sbom_extension"
# Upload the metadata
# `metadata_service upload` skips the upload if the metadata already exists in GCS.
echo '--- UPLOADING METADATA ---'
if test "$publish_mode" = "main-release"; then
metadata_upload_prerelease_flag=''
else
# yes, it's --prerelease and not --pre-release
metadata_upload_prerelease_flag="--prerelease $docker_tag"
fi
# Under the hood, this reads the GCS_CREDENTIALS environment variable
poetry run --directory $METADATA_SERVICE_PATH metadata_service upload "$meta" "$DOCS_ROOT/" "$metadata_bucket" $metadata_upload_prerelease_flag