From ff533e4ea344a0d50e7f677b6f0048c54416f21b Mon Sep 17 00:00:00 2001 From: Peter Bengtsson Date: Tue, 27 Sep 2022 22:41:21 +0200 Subject: [PATCH 1/2] be more explicit about potential alias updates (#31214) --- script/search/index-elasticsearch.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/search/index-elasticsearch.js b/script/search/index-elasticsearch.js index e87a3b6c4c..c117a2df94 100755 --- a/script/search/index-elasticsearch.js +++ b/script/search/index-elasticsearch.js @@ -336,7 +336,7 @@ async function indexVersion( console.log('Deleting index', index.index) } } - + console.log('Updating alias actions:', aliasUpdates) await client.indices.updateAliases({ body: { actions: aliasUpdates } }) } From db52a7e8bd038c9719aa6d394d10d8f152364c56 Mon Sep 17 00:00:00 2001 From: Peter Bengtsson Date: Tue, 27 Sep 2022 22:59:33 +0200 Subject: [PATCH 2/2] dry-run with scraping and elasticsearch (#31201) --- .../dry-run-elasticsearch-indexing.yml | 54 +++++++++++++++++-- .../workflows/sync-search-elasticsearch.yml | 3 +- package.json | 2 +- script/search/index-elasticsearch.js | 19 ++++--- 4 files changed, 62 insertions(+), 16 deletions(-) diff --git a/.github/workflows/dry-run-elasticsearch-indexing.yml b/.github/workflows/dry-run-elasticsearch-indexing.yml index e384b97c2b..0cd2fbcca1 100644 --- a/.github/workflows/dry-run-elasticsearch-indexing.yml +++ b/.github/workflows/dry-run-elasticsearch-indexing.yml @@ -46,9 +46,57 @@ jobs: node-version: 16.15.x cache: npm - - name: Install + - name: Install dependencies run: npm ci + - name: Cache nextjs build + uses: actions/cache@48af2dc4a9e8278b89d7fa154b955c30c6aaab09 + with: + path: .next/cache + key: ${{ runner.os }}-nextjs-${{ hashFiles('package*.json') }} + + - name: Run build scripts + run: npm run build + + - name: Start the server in the background + env: + ENABLE_DEV_LOGGING: false + run: | + npm run sync-search-server > /tmp/stdout.log 2> /tmp/stderr.log & + + # first sleep to give it a chance to start + sleep 6 + curl --retry-connrefused --retry 4 -I http://localhost:4002/ + + - if: ${{ failure() }} + name: Debug server outputs on errors + run: | + echo "____STDOUT____" + cat /tmp/stdout.log + echo "____STDERR____" + cat /tmp/stderr.log + + - name: Scrape records into a temp directory + env: + # If a reusable, or anything in the `data/*` directory is deleted + # you might get a + # + # RenderError: Can't find the key 'site.data.reusables...' in the scope + # + # But that'll get fixed in the next translation pipeline. For now, + # let's just accept an empty string instead. + THROW_ON_EMPTY: false + + run: | + mkdir /tmp/records + npm run sync-search-indices -- \ + --language en \ + --version dotcom \ + --out-directory /tmp/records \ + --no-compression --no-lunr-index + + ls -lh /tmp/records + # Serves two purposes; # 1. Be confident that the Elasticsearch server start-up worked at all # 2. Sometimes Elasticsearch will bind to the port but still not @@ -62,8 +110,8 @@ jobs: ELASTICSEARCH_URL: 'http://localhost:9200' run: | ./script/search/index-elasticsearch.js --verbose \ - -l en -l ja \ - -V dotcom -V ghes-3.5 + -l en \ + -V dotcom -- /tmp/records - name: Show created indexes and aliases run: | diff --git a/.github/workflows/sync-search-elasticsearch.yml b/.github/workflows/sync-search-elasticsearch.yml index bd71159cb5..b5e4efba19 100644 --- a/.github/workflows/sync-search-elasticsearch.yml +++ b/.github/workflows/sync-search-elasticsearch.yml @@ -104,8 +104,7 @@ jobs: - name: Index into Elasticsearch run: | ./script/search/index-elasticsearch.js \ - --language ${{ matrix.language }} \ - --source-directory /tmp/records + --language ${{ matrix.language }} -- /tmp/records - name: Check created indexes and aliases run: | diff --git a/package.json b/package.json index b48bb6bec9..8e0be22439 100644 --- a/package.json +++ b/package.json @@ -182,7 +182,7 @@ "build": "next build", "debug": "cross-env NODE_ENV=development ENABLED_LANGUAGES='en,ja' nodemon --inspect server.js", "dev": "cross-env npm start", - "index-test-fixtures": "node script/search/index-elasticsearch.js -s tests/content/fixtures/search-indexes -l en -V ghae -V dotcom --index-prefix tests", + "index-test-fixtures": "node script/search/index-elasticsearch.js -l en -V ghae -V dotcom --index-prefix tests -- tests/content/fixtures/search-indexes", "lint": "eslint '**/*.{js,mjs,ts,tsx}'", "lint-translation": "cross-env NODE_OPTIONS=--experimental-vm-modules TEST_TRANSLATION=true jest tests/linting/lint-files.js", "prepare": "husky install", diff --git a/script/search/index-elasticsearch.js b/script/search/index-elasticsearch.js index c117a2df94..2a06183980 100755 --- a/script/search/index-elasticsearch.js +++ b/script/search/index-elasticsearch.js @@ -49,12 +49,10 @@ const shortNames = Object.fromEntries( const allVersionKeys = Object.keys(shortNames) -const DEFAULT_SOURCE_DIRECTORY = path.join('lib', 'search', 'indexes') - program .description('Creates Elasticsearch index from records') .option('-v, --verbose', 'Verbose outputs') - .addOption(new Option('-V, --version ', 'Specific versions').choices(allVersionKeys)) + .addOption(new Option('-V, --version [VERSION...]', 'Specific versions').choices(allVersionKeys)) .addOption( new Option('-l, --language ', 'Which languages to focus on').choices(languageKeys) ) @@ -62,16 +60,17 @@ program new Option('--not-language ', 'Specific language to omit').choices(languageKeys) ) .option('-u, --elasticsearch-url ', 'If different from $ELASTICSEARCH_URL') - .option( - '-s, --source-directory ', - `Directory where records files are (default ${DEFAULT_SOURCE_DIRECTORY})` - ) .option('-p, --index-prefix ', 'Index string to put before index name') + .argument('', 'where the indexable files are') .parse(process.argv) -main(program.opts()) +main(program.opts(), program.args) + +async function main(opts, args) { + if (!args.length) { + throw new Error('Must pass the source as the first argument') + } -async function main(opts) { if (!opts.elasticsearchUrl && !process.env.ELASTICSEARCH_URL) { throw new Error( 'Must passed the elasticsearch URL option or ' + @@ -103,7 +102,7 @@ async function main(opts) { if (verbose) { console.log(`Connecting to ${chalk.bold(safeUrlDisplay(node))}`) } - const sourceDirectory = opts.sourceDirectory || DEFAULT_SOURCE_DIRECTORY + const sourceDirectory = args[0] try { await fs.stat(sourceDirectory) } catch (error) {