name: Connector Performance Harness on: workflow_call: inputs: connector: type: string required: true dataset: type: string required: true repo: description: "Repo to check out code from. Defaults to the main airbyte repo. Set this when building connectors from forked repos." type: string required: false default: "airbytehq/airbyte" gitref: description: "The git ref to check out from the specified repository." type: string required: false default: master uuid: description: "Custom UUID of workflow run. Used because GitHub dispatches endpoint does not return workflow run id." type: string required: false stream-number: description: "Number of streams to use for destination performance measurement." type: string required: false default: "1" sync-mode: description: "Sync mode to use for destination performance measurement." required: false type: string default: "full_refresh" report-to-datadog: description: "Whether to report the performance test results to Datadog." required: false type: string default: "true" workflow_dispatch: inputs: connector: description: "Airbyte Connector" type: choice required: true options: - connectors/source-postgres - connectors/source-mysql - connectors/source-mongodb-v2 - connectors/destination-snowflake default: "connectors/source-postgres" repo: description: "Repo to check out code from. Defaults to the main airbyte repo. Set this when building connectors from forked repos." required: false default: "airbytehq/airbyte" gitref: description: "The git ref to check out from the specified repository." required: false default: master comment-id: description: "The comment-id of the slash command. Used to update the comment with the status." required: false uuid: description: "Custom UUID of workflow run. Used because GitHub dispatches endpoint does not return workflow run id." required: false dataset: description: "Name of dataset to use for performance measurement. Currently supports 1m, 10m, 20m." required: false default: "1m" stream-number: description: "Number of streams to use for destination performance measurement." required: false default: "1" sync-mode: description: "Sync mode to use for destination performance measurement." required: false type: choice options: - full_refresh - incremental default: "full_refresh" report-to-datadog: description: "Whether to report the performance test results to Datadog." required: false default: "false" pr: description: "PR Number (Unused)" type: number required: false jobs: uuid: name: "Custom UUID of workflow run" timeout-minutes: 10 runs-on: ubuntu-latest steps: - name: UUID ${{ inputs.uuid }} run: true start-test-runner: name: Start Build EC2 Runner needs: uuid timeout-minutes: 10 runs-on: ubuntu-latest outputs: label: ${{ steps.start-ec2-runner.outputs.label }} ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} steps: - name: Checkout Airbyte uses: actions/checkout@v3 with: repository: ${{ inputs.repo }} ref: ${{ inputs.gitref }} - name: Check PAT rate limits run: | ./tools/bin/find_non_rate_limited_PAT \ ${{ secrets.GH_PAT_BUILD_RUNNER_OSS }} \ ${{ secrets.GH_PAT_BUILD_RUNNER_BACKUP }} - name: Start AWS Runner id: start-ec2-runner uses: ./.github/actions/start-aws-runner with: aws-access-key-id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} github-token: ${{ env.PAT }} performance-test: timeout-minutes: 240 needs: start-test-runner runs-on: ${{ needs.start-test-runner.outputs.label }} steps: - name: Link comment to workflow run if: inputs.comment-id uses: peter-evans/create-or-update-comment@v1 with: comment-id: ${{ inputs.comment-id }} body: | #### Note: The following `dataset=` values are supported: `1m`(default), `10m`, `20m`, `bottleneck_stream1`, `bottleneck_stream_randomseed. For destinations only: you can also use `stream-numbers=N` to simulate N number of parallel streams. Additionally, `sync-mode=incremental` is supported for destinations. For example: `dataset=1m stream-numbers=2 sync-mode=incremental` > :runner: ${{inputs.connector}} https://github.com/${{github.repository}}/actions/runs/${{github.run_id}}. - name: Search for valid connector name format id: regex uses: AsasInnab/regex-action@v1 with: regex_pattern: "^(connectors/)?[a-zA-Z0-9-_]+$" regex_flags: "i" # required to be set for this plugin search_string: ${{ inputs.connector }} - name: Validate input workflow format if: steps.regex.outputs.first_match != inputs.connector run: echo "The connector provided has an invalid format!" && exit 1 - name: Filter supported connectors if: "${{ inputs.connector != 'connectors/source-postgres' && inputs.connector != 'connectors/source-mysql' && inputs.connector != 'connectors/destination-snowflake' && inputs.connector != 'connectors/source-mongodb-v2' }}" run: echo "Only connectors/source-postgres, source-mysql, source-mongodb-v2 and destination-snowflake currently supported by harness" && exit 1 - name: Checkout Airbyte uses: actions/checkout@v3 with: repository: ${{ inputs.repo }} ref: ${{ inputs.gitref }} fetch-depth: 0 # This is to fetch the main branch in case we are running on a different branch. - name: Install Java uses: actions/setup-java@v3 with: distribution: "zulu" java-version: "21" - name: Install Python uses: actions/setup-python@v4 with: python-version: "3.10" - name: Install CI scripts run: | pip install pipx pipx ensurepath pipx install airbyte-ci/connectors/ci_credentials pipx install airbyte-ci/connectors/connector_ops - name: Source or Destination harness id: which-harness run: | the_harness="$(echo ${{inputs.connector}} | sed 's/.*\///; s/-.*//')"-harness echo "harness_type=$the_harness" >> "$GITHUB_OUTPUT" - name: Write harness credentials run: | export PATH="$PATH:/root/.local/bin" ci_credentials connectors-performance/$HARNESS_TYPE write-to-storage connector_name=$(echo ${{ inputs.connector }} | sed 's,.*/,,') ci_credentials connectors-performance/$connector_name write-to-storage env: GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }} HARNESS_TYPE: ${{ steps.which-harness.outputs.harness_type }} - name: build harness shell: bash run: | echo "Building... ${{ steps.which-harness.outputs.harness_type }}" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY ./gradlew :airbyte-integrations:connectors-performance:$HARNESS_TYPE:build -x check env: HARNESS_TYPE: ${{ steps.which-harness.outputs.harness_type }} - name: build connector shell: bash run: | echo "Building... ${{inputs.connector}}" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY # this is a blank line connector_name=$(echo ${{ inputs.connector }} | sed 's,.*/,,') echo "Running ./gradlew :airbyte-integrations:connectors:$connector_name:build -x check" ./gradlew :airbyte-integrations:connectors:$connector_name:build -x check env: GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }} - name: KIND Kubernetes Cluster Setup uses: helm/kind-action@v1.4.0 with: config: "./tools/bin/${{ steps.which-harness.outputs.harness_type }}-kind-cluster-config.yaml" - name: Run harness id: run-harness shell: bash env: CONN: ${{ inputs.connector }} DS: ${{ inputs.dataset }} STREAM_NUMBER: ${{ inputs.stream-number }} SYNC_MODE: ${{ inputs.sync-mode }} REPORT_TO_DATADOG: ${{ inputs.report-to-datadog }} PREFIX: '{"type":"LOG","log":{"level":"INFO","message":"INFO i.a.i.p.PerformanceTest(runTest):165' SUFFIX: '"}}' HARNESS_TYPE: ${{ steps.which-harness.outputs.harness_type }} DD_API_KEY: ${{ secrets.DD_API_KEY }} run: | kubectl apply -f ./tools/bin/admin-service-account.yaml connector_name=$(echo $CONN | cut -d / -f 2) kind load docker-image airbyte/$connector_name:dev --name chart-testing kind load docker-image airbyte/$HARNESS_TYPE:dev --name chart-testing # envsubst requires variables to be exported or setup in the env field in this step. export CONNECTOR_IMAGE_NAME=${CONN/connectors/airbyte}:dev export DATASET=$DS export HARNESS=$HARNESS_TYPE envsubst < ./tools/bin/run-harness-process.yaml | kubectl create -f - echo "harness is ${{ steps.which-harness.outputs.harness_type }}" POD=$(kubectl get pod -l app=performance-harness -o jsonpath="{.items[0].metadata.name}") kubectl wait --for=condition=Ready --timeout=20s "pod/$POD" kubectl logs --follow $POD EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64) echo "RUN_RESULT<<$EOF" >> $GITHUB_OUTPUT kubectl logs --tail=1 $POD | while read line ; do line=${line#"$PREFIX"}; line=${line%"$SUFFIX"}; echo $line >> $GITHUB_OUTPUT ; done echo "$EOF" >> $GITHUB_OUTPUT - name: Link comment to workflow run if: inputs.comment-id uses: peter-evans/create-or-update-comment@v2 with: reactions: "+1" comment-id: ${{ inputs.comment-id }} body: | ## Performance test Result: ``` ${{ steps.run-harness.outputs.RUN_RESULT }} ``` # need to add credentials here # In case of self-hosted EC2 errors, remove this block. stop-test-runner: name: Stop Build EC2 Runner timeout-minutes: 10 needs: - start-test-runner # required to get output from the start-runner job - performance-test # required to wait when the main job is done - uuid runs-on: ubuntu-latest if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs steps: - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v1 with: aws-access-key-id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }} aws-region: us-east-2 - name: Checkout Airbyte uses: actions/checkout@v3 - name: Check PAT rate limits run: | ./tools/bin/find_non_rate_limited_PAT \ ${{ secrets.GH_PAT_BUILD_RUNNER_OSS }} \ ${{ secrets.GH_PAT_BUILD_RUNNER_BACKUP }} - name: Stop EC2 runner uses: supertopher/ec2-github-runner@base64v1.0.10 with: mode: stop github-token: ${{ env.PAT }} label: ${{ needs.start-test-runner.outputs.label }} ec2-instance-id: ${{ needs.start-test-runner.outputs.ec2-instance-id }}