upload metadata to dev bucket via GHA (#64534)
This commit is contained in:
18
.github/workflows/publish_connectors.yml
vendored
18
.github/workflows/publish_connectors.yml
vendored
@@ -103,6 +103,15 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
version: 1.8.5
|
version: 1.8.5
|
||||||
|
|
||||||
|
# We're intentionally not using the `google-github-actions/auth` action.
|
||||||
|
# The upload-connector-metadata step runs a script which handles auth manually.
|
||||||
|
# This is because we're writing files to multiple buckets, using different credentials
|
||||||
|
# for each bucket.
|
||||||
|
# (it's unclear whether that's actually necessary)
|
||||||
|
- name: Install gcloud
|
||||||
|
# v2.1.5
|
||||||
|
uses: google-github-actions/setup-gcloud@6a7c903a70c8625ed6700fa299f5ddb4ca6022e9
|
||||||
|
|
||||||
- name: Install metadata_service
|
- name: Install metadata_service
|
||||||
run: poetry install --directory airbyte-ci/connectors/metadata_service/lib
|
run: poetry install --directory airbyte-ci/connectors/metadata_service/lib
|
||||||
|
|
||||||
@@ -174,6 +183,15 @@ jobs:
|
|||||||
airbyte_ci_binary_url: ${{ inputs.airbyte_ci_binary_url }}
|
airbyte_ci_binary_url: ${{ inputs.airbyte_ci_binary_url }}
|
||||||
max_attempts: 2
|
max_attempts: 2
|
||||||
|
|
||||||
|
- name: Upload connector metadata
|
||||||
|
id: upload-connector-metadata
|
||||||
|
shell: bash
|
||||||
|
run: ./poe-tasks/upload-connector-metadata.sh --name ${{ matrix.connector }}
|
||||||
|
env:
|
||||||
|
GCS_CREDENTIALS: ${{ secrets.METADATA_SERVICE_DEV_GCS_CREDENTIALS }}
|
||||||
|
SPEC_CACHE_GCS_CREDENTIALS: ${{ secrets.METADATA_SERVICE_DEV_GCS_CREDENTIALS }}
|
||||||
|
METADATA_SERVICE_GCS_CREDENTIALS: ${{ secrets.METADATA_SERVICE_DEV_GCS_CREDENTIALS }}
|
||||||
|
|
||||||
notify-failure-slack-channel:
|
notify-failure-slack-channel:
|
||||||
name: "Notify Slack Channel on Publish Failures"
|
name: "Notify Slack Channel on Publish Failures"
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
|
|||||||
@@ -3,7 +3,8 @@
|
|||||||
# You can't just `source lib/util.sh`, because the current working directory probably isn't `poe-tasks`.
|
# You can't just `source lib/util.sh`, because the current working directory probably isn't `poe-tasks`.
|
||||||
|
|
||||||
CONNECTORS_DIR="airbyte-integrations/connectors"
|
CONNECTORS_DIR="airbyte-integrations/connectors"
|
||||||
DOCS_BASE_DIR="docs/integrations"
|
DOCS_ROOT="docs"
|
||||||
|
DOCS_BASE_DIR="$DOCS_ROOT/integrations"
|
||||||
METADATA_SERVICE_PATH='airbyte-ci/connectors/metadata_service/lib'
|
METADATA_SERVICE_PATH='airbyte-ci/connectors/metadata_service/lib'
|
||||||
|
|
||||||
# Usage: connector_docs_path "source-foo"
|
# Usage: connector_docs_path "source-foo"
|
||||||
@@ -50,3 +51,15 @@ generate_dev_tag() {
|
|||||||
hash=$(git rev-parse --short=10 HEAD)
|
hash=$(git rev-parse --short=10 HEAD)
|
||||||
echo "${base}-dev.${hash}"
|
echo "${base}-dev.${hash}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Authenticate to gcloud using the contents of a variable.
|
||||||
|
# That variable should contain a JSON-formatted GCP service account key.
|
||||||
|
gcloud_activate_service_account() {
|
||||||
|
touch /tmp/gcloud_creds.json
|
||||||
|
# revoke access to this file from group/other (`go=` means "for Group/Other, set permissions to nothing")
|
||||||
|
# (i.e. only the current user can interact with it)
|
||||||
|
chmod go= /tmp/gcloud_creds.json
|
||||||
|
# echo -E prevents echo from rendering \n into actual newlines.
|
||||||
|
echo -E "$1" > /tmp/gcloud_creds.json
|
||||||
|
gcloud auth activate-service-account --key-file /tmp/gcloud_creds.json
|
||||||
|
}
|
||||||
|
|||||||
116
poe-tasks/upload-connector-metadata.sh
Executable file
116
poe-tasks/upload-connector-metadata.sh
Executable file
@@ -0,0 +1,116 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# Uploads the metadata (+SBOM+spec cache) to GCS.
|
||||||
|
# Usage: ./poe-tasks/upload-connector-metadata.sh --name destination-bigquery [--pre-release] [--main-release]
|
||||||
|
# You must have three environment variables set (GCS_CREDENTIALS, METADATA_SERVICE_GCS_CREDENTIALS, SPEC_CACHE_GCS_CREDENTIALS),
|
||||||
|
# each containing a JSON-formatted GCP service account key.
|
||||||
|
# SPEC_CACHE_GCS_CREDENTIALS needs write access to `gs://$spec_cache_bucket/specs`.
|
||||||
|
# METADATA_SERVICE_GCS_CREDENTIALS needs write access to `gs://$metadata_bucket/sbom`.
|
||||||
|
# GCS_CREDENTIALS needs write access to `gs://$metadata_bucket/metadata`.
|
||||||
|
|
||||||
|
source "${BASH_SOURCE%/*}/lib/util.sh"
|
||||||
|
|
||||||
|
source "${BASH_SOURCE%/*}/lib/parse_args.sh"
|
||||||
|
connector=$(get_only_connector)
|
||||||
|
|
||||||
|
if ! test "$SPEC_CACHE_GCS_CREDENTIALS"; then
|
||||||
|
echo "SPEC_CACHE_GCS_CREDENTIALS environment variable must be set" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if ! test "$METADATA_SERVICE_GCS_CREDENTIALS"; then
|
||||||
|
echo "METADATA_SERVICE_GCS_CREDENTIALS environment variable must be set" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if ! test "$GCS_CREDENTIALS"; then
|
||||||
|
echo "GCS_CREDENTIALS environment variable must be set" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
spec_cache_bucket="dev-airbyte-cloud-connector-metadata-service"
|
||||||
|
metadata_bucket="dev-airbyte-cloud-connector-metadata-service"
|
||||||
|
|
||||||
|
syft_docker_image="anchore/syft:v1.6.0"
|
||||||
|
sbom_extension="spdx.json"
|
||||||
|
|
||||||
|
meta="${CONNECTORS_DIR}/${connector}/metadata.yaml"
|
||||||
|
doc="$(connector_docs_path $connector)"
|
||||||
|
|
||||||
|
docker_repository=$(yq -r '.data.dockerRepository' "$meta")
|
||||||
|
if test -z "$docker_repository" || test "$docker_repository" = "null"; then
|
||||||
|
echo "Error: docker_repository missing in ${meta}" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Figure out the tag that we're working on (i.e. handle the prerelease case)
|
||||||
|
base_tag=$(yq -r '.data.dockerImageTag' "$meta")
|
||||||
|
if test -z "$base_tag" || test "$base_tag" = "null"; then
|
||||||
|
echo "Error: dockerImageTag missing in ${meta}" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if test "$publish_mode" = "main-release"; then
|
||||||
|
docker_tag="$base_tag"
|
||||||
|
else
|
||||||
|
docker_tag=$(generate_dev_tag "$base_tag")
|
||||||
|
fi
|
||||||
|
|
||||||
|
full_docker_image="$docker_repository:$docker_tag"
|
||||||
|
|
||||||
|
# Upload the specs to the spec cache
|
||||||
|
run_connector_spec() {
|
||||||
|
local deployment_mode=$1
|
||||||
|
local output_file=$2
|
||||||
|
|
||||||
|
# Run the spec command, filter for SPEC messages, and write those messages to the output file.
|
||||||
|
# The jq command has a lot going on:
|
||||||
|
# * --raw-input is needed, because many connectors emit some log messages in non-JSON format
|
||||||
|
# * then we use `fromjson?` to filter for valid JSON messages
|
||||||
|
# * and then we select any spec message (i.e. {"type": "SPEC", "spec": {...}})
|
||||||
|
# * and then we extract just the `spec` field.
|
||||||
|
docker run --env DEPLOYMENT_MODE=$deployment_mode "$full_docker_image" spec | jq --raw-input --compact-output 'fromjson? | select(.type == "SPEC").spec' > $output_file
|
||||||
|
|
||||||
|
# Verify that we had exactly one spec message.
|
||||||
|
# Depending on the platform, `wc -l` may return a right-padded string like " 1".
|
||||||
|
# `tr -d ' '` deletes those spaces.
|
||||||
|
local specMessageCount=$(cat $output_file | wc -l | tr -d ' ')
|
||||||
|
if test $specMessageCount -ne 1; then
|
||||||
|
echo "Expected to get exactly one spec message from the connector when running with deployment mode '$deployment_mode'; got $specMessageCount" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
echo '--- UPLOADING SPEC TO SPEC CACHE ---'
|
||||||
|
echo 'Running spec for OSS...'
|
||||||
|
run_connector_spec OSS spec.json
|
||||||
|
echo 'Running spec for CLOUD...'
|
||||||
|
run_connector_spec CLOUD spec.cloud.json
|
||||||
|
spec_cache_base_path="gs://$spec_cache_bucket/specs/$docker_repository/$docker_tag"
|
||||||
|
gcloud_activate_service_account "$SPEC_CACHE_GCS_CREDENTIALS"
|
||||||
|
gsutil cp spec.json "$spec_cache_base_path/spec.json"
|
||||||
|
# Only upload spec.cloud.json if it's different from spec.json.
|
||||||
|
# somewhat confusingly - `diff` returns true if the files are _identical_, so we need `! diff`.
|
||||||
|
if ! diff spec.json spec.cloud.json; then
|
||||||
|
gsutil cp spec.cloud.json "$spec_cache_base_path/spec.cloud.json"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Upload the SBOM
|
||||||
|
echo '--- UPLOADING SBOM ---'
|
||||||
|
docker run \
|
||||||
|
--volume $HOME/.docker/config.json:/config/config.json \
|
||||||
|
--env DOCKER_CONFIG=/config \
|
||||||
|
"$syft_docker_image" \
|
||||||
|
-o spdx-json \
|
||||||
|
"$full_docker_image" > "$sbom_extension"
|
||||||
|
gcloud_activate_service_account "$METADATA_SERVICE_GCS_CREDENTIALS"
|
||||||
|
gsutil cp "$sbom_extension" "gs://$metadata_bucket/sbom/$docker_repository/$docker_tag.$sbom_extension"
|
||||||
|
|
||||||
|
# Upload the metadata
|
||||||
|
# `metadata_service upload` skips the upload if the metadata already exists in GCS.
|
||||||
|
echo '--- UPLOADING METADATA ---'
|
||||||
|
if test "$publish_mode" = "main-release"; then
|
||||||
|
metadata_upload_prerelease_flag=''
|
||||||
|
else
|
||||||
|
# yes, it's --prerelease and not --pre-release
|
||||||
|
metadata_upload_prerelease_flag="--prerelease $docker_tag"
|
||||||
|
fi
|
||||||
|
# Under the hood, this reads the GCS_CREDENTIALS environment variable
|
||||||
|
poetry run --directory $METADATA_SERVICE_PATH metadata_service upload "$meta" "$DOCS_ROOT/" "$metadata_bucket" $metadata_upload_prerelease_flag
|
||||||
Reference in New Issue
Block a user