dry-run with scraping and elasticsearch (#31201)
This commit is contained in:
@@ -46,9 +46,57 @@ jobs:
|
||||
node-version: 16.15.x
|
||||
cache: npm
|
||||
|
||||
- name: Install
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
|
||||
- name: Cache nextjs build
|
||||
uses: actions/cache@48af2dc4a9e8278b89d7fa154b955c30c6aaab09
|
||||
with:
|
||||
path: .next/cache
|
||||
key: ${{ runner.os }}-nextjs-${{ hashFiles('package*.json') }}
|
||||
|
||||
- name: Run build scripts
|
||||
run: npm run build
|
||||
|
||||
- name: Start the server in the background
|
||||
env:
|
||||
ENABLE_DEV_LOGGING: false
|
||||
run: |
|
||||
npm run sync-search-server > /tmp/stdout.log 2> /tmp/stderr.log &
|
||||
|
||||
# first sleep to give it a chance to start
|
||||
sleep 6
|
||||
curl --retry-connrefused --retry 4 -I http://localhost:4002/
|
||||
|
||||
- if: ${{ failure() }}
|
||||
name: Debug server outputs on errors
|
||||
run: |
|
||||
echo "____STDOUT____"
|
||||
cat /tmp/stdout.log
|
||||
echo "____STDERR____"
|
||||
cat /tmp/stderr.log
|
||||
|
||||
- name: Scrape records into a temp directory
|
||||
env:
|
||||
# If a reusable, or anything in the `data/*` directory is deleted
|
||||
# you might get a
|
||||
#
|
||||
# RenderError: Can't find the key 'site.data.reusables...' in the scope
|
||||
#
|
||||
# But that'll get fixed in the next translation pipeline. For now,
|
||||
# let's just accept an empty string instead.
|
||||
THROW_ON_EMPTY: false
|
||||
|
||||
run: |
|
||||
mkdir /tmp/records
|
||||
npm run sync-search-indices -- \
|
||||
--language en \
|
||||
--version dotcom \
|
||||
--out-directory /tmp/records \
|
||||
--no-compression --no-lunr-index
|
||||
|
||||
ls -lh /tmp/records
|
||||
|
||||
# Serves two purposes;
|
||||
# 1. Be confident that the Elasticsearch server start-up worked at all
|
||||
# 2. Sometimes Elasticsearch will bind to the port but still not
|
||||
@@ -62,8 +110,8 @@ jobs:
|
||||
ELASTICSEARCH_URL: 'http://localhost:9200'
|
||||
run: |
|
||||
./script/search/index-elasticsearch.js --verbose \
|
||||
-l en -l ja \
|
||||
-V dotcom -V ghes-3.5
|
||||
-l en \
|
||||
-V dotcom -- /tmp/records
|
||||
|
||||
- name: Show created indexes and aliases
|
||||
run: |
|
||||
|
||||
@@ -104,8 +104,7 @@ jobs:
|
||||
- name: Index into Elasticsearch
|
||||
run: |
|
||||
./script/search/index-elasticsearch.js \
|
||||
--language ${{ matrix.language }} \
|
||||
--source-directory /tmp/records
|
||||
--language ${{ matrix.language }} -- /tmp/records
|
||||
|
||||
- name: Check created indexes and aliases
|
||||
run: |
|
||||
|
||||
@@ -182,7 +182,7 @@
|
||||
"build": "next build",
|
||||
"debug": "cross-env NODE_ENV=development ENABLED_LANGUAGES='en,ja' nodemon --inspect server.js",
|
||||
"dev": "cross-env npm start",
|
||||
"index-test-fixtures": "node script/search/index-elasticsearch.js -s tests/content/fixtures/search-indexes -l en -V ghae -V dotcom --index-prefix tests",
|
||||
"index-test-fixtures": "node script/search/index-elasticsearch.js -l en -V ghae -V dotcom --index-prefix tests -- tests/content/fixtures/search-indexes",
|
||||
"lint": "eslint '**/*.{js,mjs,ts,tsx}'",
|
||||
"lint-translation": "cross-env NODE_OPTIONS=--experimental-vm-modules TEST_TRANSLATION=true jest tests/linting/lint-files.js",
|
||||
"prepare": "husky install",
|
||||
|
||||
@@ -49,12 +49,10 @@ const shortNames = Object.fromEntries(
|
||||
|
||||
const allVersionKeys = Object.keys(shortNames)
|
||||
|
||||
const DEFAULT_SOURCE_DIRECTORY = path.join('lib', 'search', 'indexes')
|
||||
|
||||
program
|
||||
.description('Creates Elasticsearch index from records')
|
||||
.option('-v, --verbose', 'Verbose outputs')
|
||||
.addOption(new Option('-V, --version <VERSION...>', 'Specific versions').choices(allVersionKeys))
|
||||
.addOption(new Option('-V, --version [VERSION...]', 'Specific versions').choices(allVersionKeys))
|
||||
.addOption(
|
||||
new Option('-l, --language <LANGUAGE...>', 'Which languages to focus on').choices(languageKeys)
|
||||
)
|
||||
@@ -62,16 +60,17 @@ program
|
||||
new Option('--not-language <LANGUAGE...>', 'Specific language to omit').choices(languageKeys)
|
||||
)
|
||||
.option('-u, --elasticsearch-url <url>', 'If different from $ELASTICSEARCH_URL')
|
||||
.option(
|
||||
'-s, --source-directory <DIRECTORY>',
|
||||
`Directory where records files are (default ${DEFAULT_SOURCE_DIRECTORY})`
|
||||
)
|
||||
.option('-p, --index-prefix <prefix>', 'Index string to put before index name')
|
||||
.argument('<source-directory>', 'where the indexable files are')
|
||||
.parse(process.argv)
|
||||
|
||||
main(program.opts())
|
||||
main(program.opts(), program.args)
|
||||
|
||||
async function main(opts, args) {
|
||||
if (!args.length) {
|
||||
throw new Error('Must pass the source as the first argument')
|
||||
}
|
||||
|
||||
async function main(opts) {
|
||||
if (!opts.elasticsearchUrl && !process.env.ELASTICSEARCH_URL) {
|
||||
throw new Error(
|
||||
'Must passed the elasticsearch URL option or ' +
|
||||
@@ -103,7 +102,7 @@ async function main(opts) {
|
||||
if (verbose) {
|
||||
console.log(`Connecting to ${chalk.bold(safeUrlDisplay(node))}`)
|
||||
}
|
||||
const sourceDirectory = opts.sourceDirectory || DEFAULT_SOURCE_DIRECTORY
|
||||
const sourceDirectory = args[0]
|
||||
try {
|
||||
await fs.stat(sourceDirectory)
|
||||
} catch (error) {
|
||||
|
||||
Reference in New Issue
Block a user