airbyte/.github/workflows/connector-performance-command.yml

name: Connector Performance Harness
on:
  workflow_call:
    inputs:
      connector:
        type: string
        required: true
      dataset:
        type: string
        required: true
      repo:
        description: "Repo to check out code from. Defaults to the main airbyte repo. Set this when building connectors from forked repos."
        type: string
        required: false
        default: "airbytehq/airbyte"
      gitref:
        description: "The git ref to check out from the specified repository."
        type: string
        required: false
        default: master
      uuid:
        description: "Custom UUID of workflow run. Used because GitHub dispatches endpoint does not return workflow run id."
        type: string
        required: false
      stream-number:
        description: "Number of streams to use for destination performance measurement."
        type: string
        required: false
        default: "1"
      sync-mode:
        description: "Sync mode to use for destination performance measurement."
        required: false
        type: string
        default: "full_refresh"
      report-to-datadog:
        description: "Whether to report the performance test results to Datadog."
        required: false
        type: string
        default: "true"
  workflow_dispatch:
    inputs:
      connector:
        description: "Airbyte Connector"
        type: choice
        required: true
        options:
          - connectors/source-postgres
          - connectors/source-mysql
          - connectors/source-mongodb-v2
          - connectors/destination-snowflake
        default: "connectors/source-postgres"
      repo:
        description: "Repo to check out code from. Defaults to the main airbyte repo. Set this when building connectors from forked repos."
        required: false
        default: "airbytehq/airbyte"
      gitref:
        description: "The git ref to check out from the specified repository."
        required: false
        default: master
      comment-id:
        description: "The comment-id of the slash command. Used to update the comment with the status."
        required: false
      uuid:
        description: "Custom UUID of workflow run. Used because GitHub dispatches endpoint does not return workflow run id."
        required: false
      dataset:
        description: "Name of dataset to use for performance measurement. Currently supports 1m, 10m, 20m."
        required: false
        default: "1m"
      stream-number:
        description: "Number of streams to use for destination performance measurement."
        required: false
        default: "1"
      sync-mode:
        description: "Sync mode to use for destination performance measurement."
        required: false
        type: choice
        options:
          - full_refresh
          - incremental
        default: "full_refresh"
      report-to-datadog:
        description: "Whether to report the performance test results to Datadog."
        required: false
        default: "false"
      pr:
        description: "PR Number (Unused)"
        type: number
        required: false

jobs:
  uuid:
    name: "Custom UUID of workflow run"
    timeout-minutes: 10
    runs-on: ubuntu-latest
    steps:
      - name: UUID ${{ inputs.uuid }}
        run: true
  start-test-runner:
    name: Start Build EC2 Runner
    needs: uuid
    timeout-minutes: 10
    runs-on: ubuntu-latest
    outputs:
      label: ${{ steps.start-ec2-runner.outputs.label }}
      ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
    steps:
      - name: Checkout Airbyte
        uses: actions/checkout@v3
        with:
          repository: ${{ inputs.repo }}
          ref: ${{ inputs.gitref }}
      - name: Check PAT rate limits
        run: |
          ./tools/bin/find_non_rate_limited_PAT \
            ${{ secrets.GH_PAT_BUILD_RUNNER_OSS }} \
            ${{ secrets.GH_PAT_BUILD_RUNNER_BACKUP }}
      - name: Start AWS Runner
        id: start-ec2-runner
        uses: ./.github/actions/start-aws-runner
        with:
          aws-access-key-id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }}
          aws-secret-access-key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }}
          github-token: ${{ env.PAT }}
  performance-test:
    timeout-minutes: 240
    needs: start-test-runner
    runs-on: ${{ needs.start-test-runner.outputs.label }}
    steps:
      - name: Link comment to workflow run
        if: inputs.comment-id
        uses: peter-evans/create-or-update-comment@v1
        with:
          comment-id: ${{ inputs.comment-id }}
          body: |
            #### Note: The following `dataset=` values are supported: `1m`<sub>(default)</sub>, `10m`, `20m`,
            `bottleneck_stream1`, `bottleneck_stream_randomseed. For destinations only: you can also use `stream-numbers=N`
            to simulate N number of parallel streams. Additionally, `sync-mode=incremental` is supported for destinations.
            For example: `dataset=1m stream-numbers=2 sync-mode=incremental`
            > :runner: ${{inputs.connector}} https://github.com/${{github.repository}}/actions/runs/${{github.run_id}}.
      - name: Search for valid connector name format
        id: regex
        uses: AsasInnab/regex-action@v1
        with:
          regex_pattern: "^(connectors/)?[a-zA-Z0-9-_]+$"
          regex_flags: "i" # required to be set for this plugin
          search_string: ${{ inputs.connector }}
      - name: Validate input workflow format
        if: steps.regex.outputs.first_match != inputs.connector
        run: echo "The connector provided has an invalid format!" && exit 1
      - name: Filter supported connectors
        if: "${{ inputs.connector != 'connectors/source-postgres' &&
          inputs.connector != 'connectors/source-mysql' &&
          inputs.connector != 'connectors/destination-snowflake' &&
          inputs.connector != 'connectors/source-mongodb-v2' }}"
        run: echo "Only connectors/source-postgres, source-mysql, source-mongodb-v2 and destination-snowflake currently supported by harness" && exit 1
      - name: Checkout Airbyte
        uses: actions/checkout@v3
        with:
          repository: ${{ inputs.repo }}
          ref: ${{ inputs.gitref }}
          fetch-depth: 0 # This is to fetch the main branch in case we are running on a different branch.
      - name: Install Java
        uses: actions/setup-java@v3
        with:
          distribution: "zulu"
          java-version: "21"
      - name: Install Python
        uses: actions/setup-python@v4
        with:
          python-version: "3.10"
      - name: Install CI scripts
        run: |
          pip install pipx
          pipx ensurepath
          pipx install airbyte-ci/connectors/ci_credentials
          pipx install airbyte-ci/connectors/connector_ops
      - name: Source or Destination harness
        id: which-harness
        run: |
          the_harness="$(echo ${{inputs.connector}} | sed 's/.*\///; s/-.*//')"-harness
          echo "harness_type=$the_harness" >> "$GITHUB_OUTPUT"
      - name: Write harness credentials
        run: |
          export PATH="$PATH:/root/.local/bin"
          ci_credentials connectors-performance/$HARNESS_TYPE write-to-storage
          connector_name=$(echo ${{ inputs.connector }} | sed 's,.*/,,')
          ci_credentials connectors-performance/$connector_name write-to-storage
        env:
          GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }}
          HARNESS_TYPE: ${{ steps.which-harness.outputs.harness_type }}
      - name: build harness
        shell: bash
        run: |
          echo "Building... ${{ steps.which-harness.outputs.harness_type }}" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY
          ./gradlew :airbyte-integrations:connectors-performance:$HARNESS_TYPE:build -x check
        env:
          HARNESS_TYPE: ${{ steps.which-harness.outputs.harness_type }}
      - name: build connector
        shell: bash
        run: |
          echo "Building... ${{inputs.connector}}" >> $GITHUB_STEP_SUMMARY
          echo "" >> $GITHUB_STEP_SUMMARY # this is a blank line
          connector_name=$(echo ${{ inputs.connector }} | sed 's,.*/,,')
          echo "Running ./gradlew :airbyte-integrations:connectors:$connector_name:build -x check"
          ./gradlew :airbyte-integrations:connectors:$connector_name:build -x check
        env:
          GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }}
      - name: KIND Kubernetes Cluster Setup
        uses: helm/kind-action@v1.4.0
        with:
          config: "./tools/bin/${{ steps.which-harness.outputs.harness_type }}-kind-cluster-config.yaml"
      - name: Run harness
        id: run-harness
        shell: bash
        env:
          CONN: ${{ inputs.connector }}
          DS: ${{ inputs.dataset }}
          STREAM_NUMBER: ${{ inputs.stream-number }}
          SYNC_MODE: ${{ inputs.sync-mode }}
          REPORT_TO_DATADOG: ${{ inputs.report-to-datadog }}
          PREFIX: '{"type":"LOG","log":{"level":"INFO","message":"INFO i.a.i.p.PerformanceTest(runTest):165'
          SUFFIX: '"}}'
          HARNESS_TYPE: ${{ steps.which-harness.outputs.harness_type }}
          DD_API_KEY: ${{ secrets.DD_API_KEY }}
        run: |
          kubectl apply -f ./tools/bin/admin-service-account.yaml
          connector_name=$(echo $CONN | cut -d / -f 2)
          kind load docker-image airbyte/$connector_name:dev --name chart-testing
          kind load docker-image airbyte/$HARNESS_TYPE:dev --name chart-testing
          # envsubst requires variables to be exported or setup in the env field in this step.
          export CONNECTOR_IMAGE_NAME=${CONN/connectors/airbyte}:dev
          export DATASET=$DS
          export HARNESS=$HARNESS_TYPE
          envsubst < ./tools/bin/run-harness-process.yaml | kubectl create -f -
          echo "harness is ${{ steps.which-harness.outputs.harness_type }}"
          POD=$(kubectl get pod -l app=performance-harness -o jsonpath="{.items[0].metadata.name}")
          kubectl wait --for=condition=Ready --timeout=20s "pod/$POD"
          kubectl logs --follow $POD
          EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64)
          echo "RUN_RESULT<<$EOF" >> $GITHUB_OUTPUT
          kubectl logs --tail=1 $POD | while read line ; do line=${line#"$PREFIX"}; line=${line%"$SUFFIX"}; echo $line >> $GITHUB_OUTPUT ; done
          echo "$EOF" >> $GITHUB_OUTPUT
      - name: Link comment to workflow run
        if: inputs.comment-id
        uses: peter-evans/create-or-update-comment@v2
        with:
          reactions: "+1"
          comment-id: ${{ inputs.comment-id }}
          body: |
            ## Performance test Result:
            ```
            ${{ steps.run-harness.outputs.RUN_RESULT }}
            ```
      # need to add credentials here
  # In case of self-hosted EC2 errors, remove this block.
  stop-test-runner:
    name: Stop Build EC2 Runner
    timeout-minutes: 10
    needs:
      - start-test-runner # required to get output from the start-runner job
      - performance-test # required to wait when the main job is done
      - uuid
    runs-on: ubuntu-latest
    if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs
    steps:
      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@v1
        with:
          aws-access-key-id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }}
          aws-secret-access-key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }}
          aws-region: us-east-2
      - name: Checkout Airbyte
        uses: actions/checkout@v3
      - name: Check PAT rate limits
        run: |
          ./tools/bin/find_non_rate_limited_PAT \
            ${{ secrets.GH_PAT_BUILD_RUNNER_OSS }} \
            ${{ secrets.GH_PAT_BUILD_RUNNER_BACKUP }}
      - name: Stop EC2 runner
        uses: supertopher/ec2-github-runner@base64v1.0.10
        with:
          mode: stop
          github-token: ${{ env.PAT }}
          label: ${{ needs.start-test-runner.outputs.label }}
          ec2-instance-id: ${{ needs.start-test-runner.outputs.ec2-instance-id }}