docs/.github/workflows/sync-search-pr.yml

name: Sync search - PR

# **What it does**: This does what `sync-sarch-elasticsearch.yml` does but
#                   with a localhost Elasticsearch and only for English.
# **Why we have it**: To test that the script works and the popular pages json is valid.
# **Who does it impact**: Docs engineering.

on:
  pull_request:
    paths:
      - script/search/parse-page-sections-into-records.js
      - script/search/popular-pages.js
      - lib/search/popular-pages.json
      # Ultimately, for debugging this workflow itself
      - .github/workflows/sync-search-pr.yml

permissions:
  contents: read

# This allows a subsequently queued workflow run to interrupt previous runs
concurrency:
  group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}'
  cancel-in-progress: true

env:
  # Yes, it's hardcoded but it makes all the steps look exactly the same
  # as they do in `sync-search-elasticsearch.yml` where it uses
  # that `${{ env.ELASTICSEARCH_URL }}`
  ELASTICSEARCH_URL: http://localhost:9200

jobs:
  lint:
    runs-on: ${{ fromJSON('["ubuntu-latest", "ubuntu-20.04-xl"]')[github.repository == 'github/docs-internal'] }}
    if: github.repository == 'github/docs-internal' || github.repository == 'github/docs'
    steps:
      - uses: getong/elasticsearch-action@95b501ab0c83dee0aac7c39b7cea3723bef14954
        with:
          # # Make sure this matches production and `test.yml`
          elasticsearch version: '7.11.1'
          host port: 9200
          container port: 9200
          host node port: 9300
          node port: 9300
          discovery type: 'single-node'

      - name: Check out repo
        uses: actions/checkout@dcd71f646680f2efd8db4afa5ad64fdcba30e748

      - name: Setup node
        uses: actions/setup-node@17f8bd926464a1afa4c6a11669539e9c1ba77048
        with:
          node-version: '16.17.0'
          cache: npm

      - name: Install dependencies
        run: npm ci

      - name: Cache nextjs build
        uses: actions/cache@48af2dc4a9e8278b89d7fa154b955c30c6aaab09
        with:
          path: .next/cache
          key: ${{ runner.os }}-nextjs-${{ hashFiles('package*.json') }}

      - name: Build
        run: npm run build

      - name: Start the server in the background
        env:
          ENABLE_DEV_LOGGING: false
        run: |
          npm run sync-search-server > /tmp/stdout.log 2> /tmp/stderr.log &

          # first sleep to give it a chance to start
          sleep 6
          curl --retry-connrefused --retry 4 -I http://localhost:4002/

      - if: ${{ failure() }}
        name: Debug server outputs on errors
        run: |
          echo "____STDOUT____"
          cat /tmp/stdout.log
          echo "____STDERR____"
          cat /tmp/stderr.log

      - name: Scrape records into a temp directory
        env:
          # If a reusable, or anything in the `data/*` directory is deleted
          # you might get a
          #
          #   RenderError: Can't find the key 'site.data.reusables...' in the scope
          #
          # But that'll get fixed in the next translation pipeline. For now,
          # let's just accept an empty string instead.
          THROW_ON_EMPTY: false

        run: |
          mkdir /tmp/records
          npm run sync-search-indices -- \
            --language en \
            --version dotcom \
            --out-directory /tmp/records \
            --no-compression --no-lunr-index

          ls -lh /tmp/records

      - name: Check that Elasticsearch is accessible
        run: |
          curl --fail --retry-connrefused --retry 5 -I ${{ env.ELASTICSEARCH_URL }}

      - name: Index into Elasticsearch
        run: |
          ./script/search/index-elasticsearch.js \
            --language en \
            --version dotcom -- /tmp/records

      - name: Check created indexes and aliases
        run: |
          curl --fail --retry-connrefused --retry 5 ${{ env.ELASTICSEARCH_URL }}/_cat/indices?v
          curl --fail --retry-connrefused --retry 5 ${{ env.ELASTICSEARCH_URL }}/_cat/indices?v