288 lines
12 KiB
YAML
288 lines
12 KiB
YAML
name: Connector Performance Harness
|
|
on:
|
|
workflow_call:
|
|
inputs:
|
|
connector:
|
|
type: string
|
|
required: true
|
|
dataset:
|
|
type: string
|
|
required: true
|
|
repo:
|
|
description: "Repo to check out code from. Defaults to the main airbyte repo. Set this when building connectors from forked repos."
|
|
type: string
|
|
required: false
|
|
default: "airbytehq/airbyte"
|
|
gitref:
|
|
description: "The git ref to check out from the specified repository."
|
|
type: string
|
|
required: false
|
|
default: master
|
|
uuid:
|
|
description: "Custom UUID of workflow run. Used because GitHub dispatches endpoint does not return workflow run id."
|
|
type: string
|
|
required: false
|
|
stream-number:
|
|
description: "Number of streams to use for destination performance measurement."
|
|
type: string
|
|
required: false
|
|
default: "1"
|
|
sync-mode:
|
|
description: "Sync mode to use for destination performance measurement."
|
|
required: false
|
|
type: string
|
|
default: "full_refresh"
|
|
report-to-datadog:
|
|
description: "Whether to report the performance test results to Datadog."
|
|
required: false
|
|
type: string
|
|
default: "true"
|
|
workflow_dispatch:
|
|
inputs:
|
|
connector:
|
|
description: "Airbyte Connector"
|
|
type: choice
|
|
required: true
|
|
options:
|
|
- connectors/source-postgres
|
|
- connectors/source-mysql
|
|
- connectors/source-mongodb-v2
|
|
- connectors/destination-snowflake
|
|
default: "connectors/source-postgres"
|
|
repo:
|
|
description: "Repo to check out code from. Defaults to the main airbyte repo. Set this when building connectors from forked repos."
|
|
required: false
|
|
default: "airbytehq/airbyte"
|
|
gitref:
|
|
description: "The git ref to check out from the specified repository."
|
|
required: false
|
|
default: master
|
|
comment-id:
|
|
description: "The comment-id of the slash command. Used to update the comment with the status."
|
|
required: false
|
|
uuid:
|
|
description: "Custom UUID of workflow run. Used because GitHub dispatches endpoint does not return workflow run id."
|
|
required: false
|
|
dataset:
|
|
description: "Name of dataset to use for performance measurement. Currently supports 1m, 10m, 20m."
|
|
required: false
|
|
default: "1m"
|
|
stream-number:
|
|
description: "Number of streams to use for destination performance measurement."
|
|
required: false
|
|
default: "1"
|
|
sync-mode:
|
|
description: "Sync mode to use for destination performance measurement."
|
|
required: false
|
|
type: choice
|
|
options:
|
|
- full_refresh
|
|
- incremental
|
|
default: "full_refresh"
|
|
report-to-datadog:
|
|
description: "Whether to report the performance test results to Datadog."
|
|
required: false
|
|
default: "false"
|
|
pr:
|
|
description: "PR Number (Unused)"
|
|
type: number
|
|
required: false
|
|
|
|
jobs:
|
|
uuid:
|
|
name: "Custom UUID of workflow run"
|
|
timeout-minutes: 10
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- name: UUID ${{ inputs.uuid }}
|
|
run: true
|
|
start-test-runner:
|
|
name: Start Build EC2 Runner
|
|
needs: uuid
|
|
timeout-minutes: 10
|
|
runs-on: ubuntu-latest
|
|
outputs:
|
|
label: ${{ steps.start-ec2-runner.outputs.label }}
|
|
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
|
|
steps:
|
|
- name: Checkout Airbyte
|
|
uses: actions/checkout@v3
|
|
with:
|
|
repository: ${{ inputs.repo }}
|
|
ref: ${{ inputs.gitref }}
|
|
- name: Check PAT rate limits
|
|
run: |
|
|
./tools/bin/find_non_rate_limited_PAT \
|
|
${{ secrets.GH_PAT_BUILD_RUNNER_OSS }} \
|
|
${{ secrets.GH_PAT_BUILD_RUNNER_BACKUP }}
|
|
- name: Start AWS Runner
|
|
id: start-ec2-runner
|
|
uses: ./.github/actions/start-aws-runner
|
|
with:
|
|
aws-access-key-id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }}
|
|
aws-secret-access-key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }}
|
|
github-token: ${{ env.PAT }}
|
|
performance-test:
|
|
timeout-minutes: 240
|
|
needs: start-test-runner
|
|
runs-on: ${{ needs.start-test-runner.outputs.label }}
|
|
steps:
|
|
- name: Link comment to workflow run
|
|
if: inputs.comment-id
|
|
uses: peter-evans/create-or-update-comment@v1
|
|
with:
|
|
comment-id: ${{ inputs.comment-id }}
|
|
body: |
|
|
#### Note: The following `dataset=` values are supported: `1m`<sub>(default)</sub>, `10m`, `20m`,
|
|
`bottleneck_stream1`, `bottleneck_stream_randomseed. For destinations only: you can also use `stream-numbers=N`
|
|
to simulate N number of parallel streams. Additionally, `sync-mode=incremental` is supported for destinations.
|
|
For example: `dataset=1m stream-numbers=2 sync-mode=incremental`
|
|
> :runner: ${{inputs.connector}} https://github.com/${{github.repository}}/actions/runs/${{github.run_id}}.
|
|
- name: Search for valid connector name format
|
|
id: regex
|
|
uses: AsasInnab/regex-action@v1
|
|
with:
|
|
regex_pattern: "^(connectors/)?[a-zA-Z0-9-_]+$"
|
|
regex_flags: "i" # required to be set for this plugin
|
|
search_string: ${{ inputs.connector }}
|
|
- name: Validate input workflow format
|
|
if: steps.regex.outputs.first_match != inputs.connector
|
|
run: echo "The connector provided has an invalid format!" && exit 1
|
|
- name: Filter supported connectors
|
|
if: "${{ inputs.connector != 'connectors/source-postgres' &&
|
|
inputs.connector != 'connectors/source-mysql' &&
|
|
inputs.connector != 'connectors/destination-snowflake' &&
|
|
inputs.connector != 'connectors/source-mongodb-v2' }}"
|
|
run: echo "Only connectors/source-postgres, source-mysql, source-mongodb-v2 and destination-snowflake currently supported by harness" && exit 1
|
|
- name: Checkout Airbyte
|
|
uses: actions/checkout@v3
|
|
with:
|
|
repository: ${{ inputs.repo }}
|
|
ref: ${{ inputs.gitref }}
|
|
fetch-depth: 0 # This is to fetch the main branch in case we are running on a different branch.
|
|
- name: Install Java
|
|
uses: actions/setup-java@v3
|
|
with:
|
|
distribution: "zulu"
|
|
java-version: "21"
|
|
- name: Install Python
|
|
uses: actions/setup-python@v4
|
|
with:
|
|
python-version: "3.10"
|
|
- name: Install CI scripts
|
|
run: |
|
|
pip install pipx
|
|
pipx ensurepath
|
|
pipx install airbyte-ci/connectors/ci_credentials
|
|
pipx install airbyte-ci/connectors/connector_ops
|
|
- name: Source or Destination harness
|
|
id: which-harness
|
|
run: |
|
|
the_harness="$(echo ${{inputs.connector}} | sed 's/.*\///; s/-.*//')"-harness
|
|
echo "harness_type=$the_harness" >> "$GITHUB_OUTPUT"
|
|
- name: Write harness credentials
|
|
run: |
|
|
export PATH="$PATH:/root/.local/bin"
|
|
ci_credentials connectors-performance/$HARNESS_TYPE write-to-storage
|
|
connector_name=$(echo ${{ inputs.connector }} | sed 's,.*/,,')
|
|
ci_credentials connectors-performance/$connector_name write-to-storage
|
|
env:
|
|
GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }}
|
|
HARNESS_TYPE: ${{ steps.which-harness.outputs.harness_type }}
|
|
- name: build harness
|
|
shell: bash
|
|
run: |
|
|
echo "Building... ${{ steps.which-harness.outputs.harness_type }}" >> $GITHUB_STEP_SUMMARY
|
|
echo "" >> $GITHUB_STEP_SUMMARY
|
|
./gradlew :airbyte-integrations:connectors-performance:$HARNESS_TYPE:build -x check
|
|
env:
|
|
HARNESS_TYPE: ${{ steps.which-harness.outputs.harness_type }}
|
|
- name: build connector
|
|
shell: bash
|
|
run: |
|
|
echo "Building... ${{inputs.connector}}" >> $GITHUB_STEP_SUMMARY
|
|
echo "" >> $GITHUB_STEP_SUMMARY # this is a blank line
|
|
connector_name=$(echo ${{ inputs.connector }} | sed 's,.*/,,')
|
|
echo "Running ./gradlew :airbyte-integrations:connectors:$connector_name:build -x check"
|
|
./gradlew :airbyte-integrations:connectors:$connector_name:build -x check
|
|
env:
|
|
GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }}
|
|
- name: KIND Kubernetes Cluster Setup
|
|
uses: helm/kind-action@v1.4.0
|
|
with:
|
|
config: "./tools/bin/${{ steps.which-harness.outputs.harness_type }}-kind-cluster-config.yaml"
|
|
- name: Run harness
|
|
id: run-harness
|
|
shell: bash
|
|
env:
|
|
CONN: ${{ inputs.connector }}
|
|
DS: ${{ inputs.dataset }}
|
|
STREAM_NUMBER: ${{ inputs.stream-number }}
|
|
SYNC_MODE: ${{ inputs.sync-mode }}
|
|
REPORT_TO_DATADOG: ${{ inputs.report-to-datadog }}
|
|
PREFIX: '{"type":"LOG","log":{"level":"INFO","message":"INFO i.a.i.p.PerformanceTest(runTest):165'
|
|
SUFFIX: '"}}'
|
|
HARNESS_TYPE: ${{ steps.which-harness.outputs.harness_type }}
|
|
DD_API_KEY: ${{ secrets.DD_API_KEY }}
|
|
run: |
|
|
kubectl apply -f ./tools/bin/admin-service-account.yaml
|
|
connector_name=$(echo $CONN | cut -d / -f 2)
|
|
kind load docker-image airbyte/$connector_name:dev --name chart-testing
|
|
kind load docker-image airbyte/$HARNESS_TYPE:dev --name chart-testing
|
|
# envsubst requires variables to be exported or setup in the env field in this step.
|
|
export CONNECTOR_IMAGE_NAME=${CONN/connectors/airbyte}:dev
|
|
export DATASET=$DS
|
|
export HARNESS=$HARNESS_TYPE
|
|
envsubst < ./tools/bin/run-harness-process.yaml | kubectl create -f -
|
|
echo "harness is ${{ steps.which-harness.outputs.harness_type }}"
|
|
POD=$(kubectl get pod -l app=performance-harness -o jsonpath="{.items[0].metadata.name}")
|
|
kubectl wait --for=condition=Ready --timeout=20s "pod/$POD"
|
|
kubectl logs --follow $POD
|
|
EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64)
|
|
echo "RUN_RESULT<<$EOF" >> $GITHUB_OUTPUT
|
|
kubectl logs --tail=1 $POD | while read line ; do line=${line#"$PREFIX"}; line=${line%"$SUFFIX"}; echo $line >> $GITHUB_OUTPUT ; done
|
|
echo "$EOF" >> $GITHUB_OUTPUT
|
|
- name: Link comment to workflow run
|
|
if: inputs.comment-id
|
|
uses: peter-evans/create-or-update-comment@v2
|
|
with:
|
|
reactions: "+1"
|
|
comment-id: ${{ inputs.comment-id }}
|
|
body: |
|
|
## Performance test Result:
|
|
```
|
|
${{ steps.run-harness.outputs.RUN_RESULT }}
|
|
```
|
|
# need to add credentials here
|
|
# In case of self-hosted EC2 errors, remove this block.
|
|
stop-test-runner:
|
|
name: Stop Build EC2 Runner
|
|
timeout-minutes: 10
|
|
needs:
|
|
- start-test-runner # required to get output from the start-runner job
|
|
- performance-test # required to wait when the main job is done
|
|
- uuid
|
|
runs-on: ubuntu-latest
|
|
if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs
|
|
steps:
|
|
- name: Configure AWS credentials
|
|
uses: aws-actions/configure-aws-credentials@v1
|
|
with:
|
|
aws-access-key-id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }}
|
|
aws-secret-access-key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }}
|
|
aws-region: us-east-2
|
|
- name: Checkout Airbyte
|
|
uses: actions/checkout@v3
|
|
- name: Check PAT rate limits
|
|
run: |
|
|
./tools/bin/find_non_rate_limited_PAT \
|
|
${{ secrets.GH_PAT_BUILD_RUNNER_OSS }} \
|
|
${{ secrets.GH_PAT_BUILD_RUNNER_BACKUP }}
|
|
- name: Stop EC2 runner
|
|
uses: supertopher/ec2-github-runner@base64v1.0.10
|
|
with:
|
|
mode: stop
|
|
github-token: ${{ env.PAT }}
|
|
label: ${{ needs.start-test-runner.outputs.label }}
|
|
ec2-instance-id: ${{ needs.start-test-runner.outputs.ec2-instance-id }}
|