upload metadata to dev bucket via GHA (#64534)
This commit is contained in:
18
.github/workflows/publish_connectors.yml
vendored
18
.github/workflows/publish_connectors.yml
vendored
@@ -103,6 +103,15 @@ jobs:
|
||||
with:
|
||||
version: 1.8.5
|
||||
|
||||
# We're intentionally not using the `google-github-actions/auth` action.
|
||||
# The upload-connector-metadata step runs a script which handles auth manually.
|
||||
# This is because we're writing files to multiple buckets, using different credentials
|
||||
# for each bucket.
|
||||
# (it's unclear whether that's actually necessary)
|
||||
- name: Install gcloud
|
||||
# v2.1.5
|
||||
uses: google-github-actions/setup-gcloud@6a7c903a70c8625ed6700fa299f5ddb4ca6022e9
|
||||
|
||||
- name: Install metadata_service
|
||||
run: poetry install --directory airbyte-ci/connectors/metadata_service/lib
|
||||
|
||||
@@ -174,6 +183,15 @@ jobs:
|
||||
airbyte_ci_binary_url: ${{ inputs.airbyte_ci_binary_url }}
|
||||
max_attempts: 2
|
||||
|
||||
- name: Upload connector metadata
|
||||
id: upload-connector-metadata
|
||||
shell: bash
|
||||
run: ./poe-tasks/upload-connector-metadata.sh --name ${{ matrix.connector }}
|
||||
env:
|
||||
GCS_CREDENTIALS: ${{ secrets.METADATA_SERVICE_DEV_GCS_CREDENTIALS }}
|
||||
SPEC_CACHE_GCS_CREDENTIALS: ${{ secrets.METADATA_SERVICE_DEV_GCS_CREDENTIALS }}
|
||||
METADATA_SERVICE_GCS_CREDENTIALS: ${{ secrets.METADATA_SERVICE_DEV_GCS_CREDENTIALS }}
|
||||
|
||||
notify-failure-slack-channel:
|
||||
name: "Notify Slack Channel on Publish Failures"
|
||||
runs-on: ubuntu-24.04
|
||||
|
||||
@@ -3,7 +3,8 @@
|
||||
# You can't just `source lib/util.sh`, because the current working directory probably isn't `poe-tasks`.
|
||||
|
||||
CONNECTORS_DIR="airbyte-integrations/connectors"
|
||||
DOCS_BASE_DIR="docs/integrations"
|
||||
DOCS_ROOT="docs"
|
||||
DOCS_BASE_DIR="$DOCS_ROOT/integrations"
|
||||
METADATA_SERVICE_PATH='airbyte-ci/connectors/metadata_service/lib'
|
||||
|
||||
# Usage: connector_docs_path "source-foo"
|
||||
@@ -50,3 +51,15 @@ generate_dev_tag() {
|
||||
hash=$(git rev-parse --short=10 HEAD)
|
||||
echo "${base}-dev.${hash}"
|
||||
}
|
||||
|
||||
# Authenticate to gcloud using the contents of a variable.
|
||||
# That variable should contain a JSON-formatted GCP service account key.
|
||||
gcloud_activate_service_account() {
|
||||
touch /tmp/gcloud_creds.json
|
||||
# revoke access to this file from group/other (`go=` means "for Group/Other, set permissions to nothing")
|
||||
# (i.e. only the current user can interact with it)
|
||||
chmod go= /tmp/gcloud_creds.json
|
||||
# echo -E prevents echo from rendering \n into actual newlines.
|
||||
echo -E "$1" > /tmp/gcloud_creds.json
|
||||
gcloud auth activate-service-account --key-file /tmp/gcloud_creds.json
|
||||
}
|
||||
|
||||
116
poe-tasks/upload-connector-metadata.sh
Executable file
116
poe-tasks/upload-connector-metadata.sh
Executable file
@@ -0,0 +1,116 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Uploads the metadata (+SBOM+spec cache) to GCS.
|
||||
# Usage: ./poe-tasks/upload-connector-metadata.sh --name destination-bigquery [--pre-release] [--main-release]
|
||||
# You must have three environment variables set (GCS_CREDENTIALS, METADATA_SERVICE_GCS_CREDENTIALS, SPEC_CACHE_GCS_CREDENTIALS),
|
||||
# each containing a JSON-formatted GCP service account key.
|
||||
# SPEC_CACHE_GCS_CREDENTIALS needs write access to `gs://$spec_cache_bucket/specs`.
|
||||
# METADATA_SERVICE_GCS_CREDENTIALS needs write access to `gs://$metadata_bucket/sbom`.
|
||||
# GCS_CREDENTIALS needs write access to `gs://$metadata_bucket/metadata`.
|
||||
|
||||
source "${BASH_SOURCE%/*}/lib/util.sh"
|
||||
|
||||
source "${BASH_SOURCE%/*}/lib/parse_args.sh"
|
||||
connector=$(get_only_connector)
|
||||
|
||||
if ! test "$SPEC_CACHE_GCS_CREDENTIALS"; then
|
||||
echo "SPEC_CACHE_GCS_CREDENTIALS environment variable must be set" >&2
|
||||
exit 1
|
||||
fi
|
||||
if ! test "$METADATA_SERVICE_GCS_CREDENTIALS"; then
|
||||
echo "METADATA_SERVICE_GCS_CREDENTIALS environment variable must be set" >&2
|
||||
exit 1
|
||||
fi
|
||||
if ! test "$GCS_CREDENTIALS"; then
|
||||
echo "GCS_CREDENTIALS environment variable must be set" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
spec_cache_bucket="dev-airbyte-cloud-connector-metadata-service"
|
||||
metadata_bucket="dev-airbyte-cloud-connector-metadata-service"
|
||||
|
||||
syft_docker_image="anchore/syft:v1.6.0"
|
||||
sbom_extension="spdx.json"
|
||||
|
||||
meta="${CONNECTORS_DIR}/${connector}/metadata.yaml"
|
||||
doc="$(connector_docs_path $connector)"
|
||||
|
||||
docker_repository=$(yq -r '.data.dockerRepository' "$meta")
|
||||
if test -z "$docker_repository" || test "$docker_repository" = "null"; then
|
||||
echo "Error: docker_repository missing in ${meta}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Figure out the tag that we're working on (i.e. handle the prerelease case)
|
||||
base_tag=$(yq -r '.data.dockerImageTag' "$meta")
|
||||
if test -z "$base_tag" || test "$base_tag" = "null"; then
|
||||
echo "Error: dockerImageTag missing in ${meta}" >&2
|
||||
exit 1
|
||||
fi
|
||||
if test "$publish_mode" = "main-release"; then
|
||||
docker_tag="$base_tag"
|
||||
else
|
||||
docker_tag=$(generate_dev_tag "$base_tag")
|
||||
fi
|
||||
|
||||
full_docker_image="$docker_repository:$docker_tag"
|
||||
|
||||
# Upload the specs to the spec cache
|
||||
run_connector_spec() {
|
||||
local deployment_mode=$1
|
||||
local output_file=$2
|
||||
|
||||
# Run the spec command, filter for SPEC messages, and write those messages to the output file.
|
||||
# The jq command has a lot going on:
|
||||
# * --raw-input is needed, because many connectors emit some log messages in non-JSON format
|
||||
# * then we use `fromjson?` to filter for valid JSON messages
|
||||
# * and then we select any spec message (i.e. {"type": "SPEC", "spec": {...}})
|
||||
# * and then we extract just the `spec` field.
|
||||
docker run --env DEPLOYMENT_MODE=$deployment_mode "$full_docker_image" spec | jq --raw-input --compact-output 'fromjson? | select(.type == "SPEC").spec' > $output_file
|
||||
|
||||
# Verify that we had exactly one spec message.
|
||||
# Depending on the platform, `wc -l` may return a right-padded string like " 1".
|
||||
# `tr -d ' '` deletes those spaces.
|
||||
local specMessageCount=$(cat $output_file | wc -l | tr -d ' ')
|
||||
if test $specMessageCount -ne 1; then
|
||||
echo "Expected to get exactly one spec message from the connector when running with deployment mode '$deployment_mode'; got $specMessageCount" >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
echo '--- UPLOADING SPEC TO SPEC CACHE ---'
|
||||
echo 'Running spec for OSS...'
|
||||
run_connector_spec OSS spec.json
|
||||
echo 'Running spec for CLOUD...'
|
||||
run_connector_spec CLOUD spec.cloud.json
|
||||
spec_cache_base_path="gs://$spec_cache_bucket/specs/$docker_repository/$docker_tag"
|
||||
gcloud_activate_service_account "$SPEC_CACHE_GCS_CREDENTIALS"
|
||||
gsutil cp spec.json "$spec_cache_base_path/spec.json"
|
||||
# Only upload spec.cloud.json if it's different from spec.json.
|
||||
# somewhat confusingly - `diff` returns true if the files are _identical_, so we need `! diff`.
|
||||
if ! diff spec.json spec.cloud.json; then
|
||||
gsutil cp spec.cloud.json "$spec_cache_base_path/spec.cloud.json"
|
||||
fi
|
||||
|
||||
# Upload the SBOM
|
||||
echo '--- UPLOADING SBOM ---'
|
||||
docker run \
|
||||
--volume $HOME/.docker/config.json:/config/config.json \
|
||||
--env DOCKER_CONFIG=/config \
|
||||
"$syft_docker_image" \
|
||||
-o spdx-json \
|
||||
"$full_docker_image" > "$sbom_extension"
|
||||
gcloud_activate_service_account "$METADATA_SERVICE_GCS_CREDENTIALS"
|
||||
gsutil cp "$sbom_extension" "gs://$metadata_bucket/sbom/$docker_repository/$docker_tag.$sbom_extension"
|
||||
|
||||
# Upload the metadata
|
||||
# `metadata_service upload` skips the upload if the metadata already exists in GCS.
|
||||
echo '--- UPLOADING METADATA ---'
|
||||
if test "$publish_mode" = "main-release"; then
|
||||
metadata_upload_prerelease_flag=''
|
||||
else
|
||||
# yes, it's --prerelease and not --pre-release
|
||||
metadata_upload_prerelease_flag="--prerelease $docker_tag"
|
||||
fi
|
||||
# Under the hood, this reads the GCS_CREDENTIALS environment variable
|
||||
poetry run --directory $METADATA_SERVICE_PATH metadata_service upload "$meta" "$DOCS_ROOT/" "$metadata_bucket" $metadata_upload_prerelease_flag
|
||||
Reference in New Issue
Block a user