216 lines
7.9 KiB
YAML
216 lines
7.9 KiB
YAML
name: Sync search Elasticsearch
|
|
|
|
# **What it does**: It scrapes the whole site and dumps the records in a
|
|
# temp directory. Then it indexes that into Elasticsearch.
|
|
# **Why we have it**: We want our search indexes kept up to date.
|
|
# **Who does it impact**: Anyone using search on docs.
|
|
|
|
on:
|
|
workflow_dispatch:
|
|
inputs:
|
|
version:
|
|
description: "Version to exclusively generate the search index for. E.g. 'dotcom', 'ghes-3.7', 'ghae'"
|
|
required: false
|
|
default: ''
|
|
languages:
|
|
description: "Comma separated languages. E.g. 'en,ja, es' (defaults to all)"
|
|
required: false
|
|
default: ''
|
|
schedule:
|
|
- cron: '20 */24 * * *' # Run every 24 hours at 20 minutes past the hour
|
|
workflow_run:
|
|
workflows: ['Azure Production - Build and Deploy']
|
|
types:
|
|
- completed
|
|
|
|
permissions:
|
|
contents: read
|
|
|
|
# This allows a subsequently queued workflow run to cancel previous runs
|
|
concurrency:
|
|
group: '${{ github.workflow }} @ ${{ github.head_ref }} ${{ github.event_name }}'
|
|
cancel-in-progress: true
|
|
|
|
env:
|
|
FREEZE: ${{ secrets.FREEZE }}
|
|
ELASTICSEARCH_URL: ${{ secrets.ELASTICSEARCH_URL }}
|
|
|
|
# This might seem a bit strange, but it's clever. Since this action
|
|
# uses a matrix to deal with one language at a time, we can use this
|
|
# to pretend it's always the same directory.
|
|
TRANSLATIONS_ROOT_ES_ES: translation
|
|
TRANSLATIONS_ROOT_ZH_CN: translation
|
|
TRANSLATIONS_ROOT_JA_JP: translation
|
|
TRANSLATIONS_ROOT_PT_BR: translation
|
|
TRANSLATIONS_ROOT_FR_FR: translation
|
|
TRANSLATIONS_ROOT_RU_RU: translation
|
|
TRANSLATIONS_ROOT_KO_KR: translation
|
|
TRANSLATIONS_ROOT_DE_DE: translation
|
|
|
|
jobs:
|
|
figureOutMatrix:
|
|
runs-on: ubuntu-latest
|
|
outputs:
|
|
matrix: ${{ steps.set-matrix.outputs.result }}
|
|
steps:
|
|
- uses: actions/github-script@d556feaca394842dc55e4734bf3bb9f685482fa0
|
|
id: set-matrix
|
|
with:
|
|
script: |
|
|
// Edit this list for the definitive list of languages
|
|
// (other than English) we want to index in Elasticsearch.
|
|
const allNonEnglish = ["ja", "es", "pt", "cn", "ru", "fr", "ko", "de"]
|
|
const allPossible = ["en", ...allNonEnglish]
|
|
|
|
if (context.eventName === "workflow_run") {
|
|
if (context.payload.workflow_run.conclusion === "success") {
|
|
return ["en"]
|
|
}
|
|
throw new Error(`It was a workflow_run but not success ('${context.payload.workflow_run.conclusion}')`)
|
|
}
|
|
|
|
if (context.eventName === "workflow_dispatch") {
|
|
if (context.payload.inputs.languages) {
|
|
const clean = context.payload.inputs.languages.split(',').map(x => x.trim()).filter(Boolean)
|
|
const notRecognized = clean.find(x => !allPossible.includes(x))
|
|
if (notRecognized) {
|
|
throw new Error(`'${notRecognized}' is not a recognized language code`)
|
|
}
|
|
return clean
|
|
}
|
|
return allPossible
|
|
}
|
|
|
|
if (context.eventName === "schedule") {
|
|
return allNonEnglish
|
|
}
|
|
|
|
console.log(context)
|
|
throw new Error(`Unable figure out what languages to run (${context.eventName})`)
|
|
|
|
- name: Debug output
|
|
run: echo "${{ steps.set-matrix.outputs.result }}"
|
|
|
|
updateElasticsearchIndexes:
|
|
needs: figureOutMatrix
|
|
name: Update indexes
|
|
if: ${{ github.repository == 'github/docs-internal' }}
|
|
runs-on: ubuntu-20.04-xl
|
|
strategy:
|
|
fail-fast: false
|
|
max-parallel: 3
|
|
matrix:
|
|
language: ${{ fromJSON(needs.figureOutMatrix.outputs.matrix) }}
|
|
steps:
|
|
- if: ${{ env.FREEZE == 'true' }}
|
|
run: |
|
|
echo 'The repo is currently frozen! Exiting this workflow.'
|
|
exit 1 # prevents further steps from running
|
|
|
|
- name: Check out repo
|
|
uses: actions/checkout@dcd71f646680f2efd8db4afa5ad64fdcba30e748
|
|
|
|
- name: Checkout the non-English repo
|
|
if: ${{ matrix.language != 'en' }}
|
|
uses: actions/checkout@dcd71f646680f2efd8db4afa5ad64fdcba30e748
|
|
with:
|
|
repository: github/docs-internal.${{ fromJSON('{"cn":"zh-cn","es":"es-es","ru":"ru-ru","ja":"ja-jp","pt":"pt-br","de":"de-de","fr":"fr-fr","ko":"ko-kr"}')[matrix.language] }}
|
|
token: ${{ secrets.DOCUBOT_READORG_REPO_WORKFLOW_SCOPES }}
|
|
path: translation
|
|
|
|
- name: Setup Node
|
|
uses: actions/setup-node@17f8bd926464a1afa4c6a11669539e9c1ba77048
|
|
with:
|
|
node-version: '16.17.0'
|
|
cache: npm
|
|
|
|
- name: Install dependencies
|
|
run: npm ci
|
|
|
|
- name: Cache nextjs build
|
|
uses: actions/cache@48af2dc4a9e8278b89d7fa154b955c30c6aaab09
|
|
with:
|
|
path: .next/cache
|
|
key: ${{ runner.os }}-nextjs-${{ hashFiles('package*.json') }}
|
|
|
|
- name: Run build scripts
|
|
run: npm run build
|
|
|
|
- name: Start the server in the background
|
|
env:
|
|
ENABLE_DEV_LOGGING: false
|
|
run: |
|
|
npm run sync-search-server > /tmp/stdout.log 2> /tmp/stderr.log &
|
|
|
|
# first sleep to give it a chance to start
|
|
sleep 6
|
|
curl --retry-connrefused --retry 4 -I http://localhost:4002/
|
|
|
|
- if: ${{ failure() }}
|
|
name: Debug server outputs on errors
|
|
run: |
|
|
echo "____STDOUT____"
|
|
cat /tmp/stdout.log
|
|
echo "____STDERR____"
|
|
cat /tmp/stderr.log
|
|
|
|
- name: Scrape records into a temp directory
|
|
env:
|
|
# If a reusable, or anything in the `data/*` directory is deleted
|
|
# you might get a
|
|
#
|
|
# RenderError: Can't find the key 'site.data.reusables...' in the scope
|
|
#
|
|
# But that'll get fixed in the next translation pipeline. For now,
|
|
# let's just accept an empty string instead.
|
|
THROW_ON_EMPTY: false
|
|
|
|
# Note that by default, this is '' (empty string) and that means
|
|
# the same as not set within the script.
|
|
VERSION: ${{ github.event.inputs.version }}
|
|
|
|
run: |
|
|
mkdir /tmp/records
|
|
npm run sync-search-indices -- /tmp/records \
|
|
--language ${{ matrix.language }}
|
|
|
|
ls -lh /tmp/records
|
|
|
|
- name: Check that Elasticsearch is accessible
|
|
run: |
|
|
curl --fail --retry-connrefused --retry 5 -I ${{ env.ELASTICSEARCH_URL }}
|
|
|
|
- name: Index into Elasticsearch
|
|
env:
|
|
# Must match what we used when scraping (npm run sync-search-indices)
|
|
# otherwise the script will seek other versions from disk that might
|
|
# not exist.
|
|
VERSION: ${{ github.event.inputs.version }}
|
|
run: |
|
|
./script/search/index-elasticsearch.js /tmp/records \
|
|
--language ${{ matrix.language }} \
|
|
|
|
- name: Check created indexes and aliases
|
|
run: |
|
|
# Not using `--fail` here because I've observed that it can fail
|
|
# with a rather cryptic 404 error when it should, if anything, be
|
|
# a 200 OK with a list of no indices.
|
|
curl --retry-connrefused --retry 5 ${{ env.ELASTICSEARCH_URL }}/_cat/indices?v
|
|
curl --retry-connrefused --retry 5 ${{ env.ELASTICSEARCH_URL }}/_cat/indices?v
|
|
|
|
- name: Purge Fastly edge cache
|
|
env:
|
|
FASTLY_TOKEN: ${{ secrets.FASTLY_TOKEN }}
|
|
FASTLY_SERVICE_ID: ${{ secrets.FASTLY_SERVICE_ID }}
|
|
FASTLY_SURROGATE_KEY: api-search:${{ matrix.language }}
|
|
run: .github/actions-scripts/purge-fastly-edge-cache.js
|
|
|
|
- name: Send Slack notification if workflow fails
|
|
uses: someimportantcompany/github-actions-slack-message@f8d28715e7b8a4717047d23f48c39827cacad340
|
|
if: failure()
|
|
with:
|
|
channel: ${{ secrets.DOCS_ALERTS_SLACK_CHANNEL_ID }}
|
|
bot-token: ${{ secrets.SLACK_DOCS_BOT_TOKEN }}
|
|
color: failure
|
|
text: The last 'Sync search Elasticsearch' run failed. See https://github.com/${{github.repository}}/actions/workflows/sync-search-elasticsearch.yml?query=workflow%3A%22sync+search%22
|