remove lunr from codebase (#32432)
This commit is contained in:
119
.github/workflows/dry-run-elasticsearch-indexing.yml
vendored
119
.github/workflows/dry-run-elasticsearch-indexing.yml
vendored
@@ -1,119 +0,0 @@
|
|||||||
name: Dry run Elasticsearch indexing
|
|
||||||
|
|
||||||
# **What it does**: Tests to index records into a local Elasticsearch
|
|
||||||
# **Why we have it**: To make sure the indexing code works.
|
|
||||||
# **Who does it impact**: Docs engineering.
|
|
||||||
|
|
||||||
on:
|
|
||||||
merge_group:
|
|
||||||
pull_request:
|
|
||||||
paths:
|
|
||||||
- 'script/search/**'
|
|
||||||
- 'package*.json'
|
|
||||||
- .github/workflows/dry-run-elasticsearch-indexing.yml
|
|
||||||
|
|
||||||
permissions:
|
|
||||||
contents: read
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
dry-run-elasticsearch-indexing:
|
|
||||||
# Avoid github/docs and forks of it
|
|
||||||
if: github.repository == 'github/docs-internal'
|
|
||||||
|
|
||||||
runs-on: ubuntu-20.04-xl
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- uses: getong/elasticsearch-action@95b501ab0c83dee0aac7c39b7cea3723bef14954
|
|
||||||
with:
|
|
||||||
elasticsearch version: '8.2.0'
|
|
||||||
host port: 9200
|
|
||||||
container port: 9200
|
|
||||||
host node port: 9300
|
|
||||||
node port: 9300
|
|
||||||
discovery type: 'single-node'
|
|
||||||
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@dcd71f646680f2efd8db4afa5ad64fdcba30e748
|
|
||||||
with:
|
|
||||||
lfs: 'true'
|
|
||||||
|
|
||||||
- name: Check out LFS objects
|
|
||||||
run: git lfs checkout
|
|
||||||
|
|
||||||
- name: Setup node
|
|
||||||
uses: actions/setup-node@1f8c6b94b26d0feae1e387ca63ccbdc44d27b561
|
|
||||||
with:
|
|
||||||
node-version: 16.15.x
|
|
||||||
cache: npm
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
run: npm ci
|
|
||||||
|
|
||||||
- name: Cache nextjs build
|
|
||||||
uses: actions/cache@48af2dc4a9e8278b89d7fa154b955c30c6aaab09
|
|
||||||
with:
|
|
||||||
path: .next/cache
|
|
||||||
key: ${{ runner.os }}-nextjs-${{ hashFiles('package*.json') }}
|
|
||||||
|
|
||||||
- name: Run build scripts
|
|
||||||
run: npm run build
|
|
||||||
|
|
||||||
- name: Start the server in the background
|
|
||||||
env:
|
|
||||||
ENABLE_DEV_LOGGING: false
|
|
||||||
run: |
|
|
||||||
npm run sync-search-server > /tmp/stdout.log 2> /tmp/stderr.log &
|
|
||||||
|
|
||||||
# first sleep to give it a chance to start
|
|
||||||
sleep 6
|
|
||||||
curl --retry-connrefused --retry 4 -I http://localhost:4002/
|
|
||||||
|
|
||||||
- if: ${{ failure() }}
|
|
||||||
name: Debug server outputs on errors
|
|
||||||
run: |
|
|
||||||
echo "____STDOUT____"
|
|
||||||
cat /tmp/stdout.log
|
|
||||||
echo "____STDERR____"
|
|
||||||
cat /tmp/stderr.log
|
|
||||||
|
|
||||||
- name: Scrape records into a temp directory
|
|
||||||
env:
|
|
||||||
# If a reusable, or anything in the `data/*` directory is deleted
|
|
||||||
# you might get a
|
|
||||||
#
|
|
||||||
# RenderError: Can't find the key 'site.data.reusables...' in the scope
|
|
||||||
#
|
|
||||||
# But that'll get fixed in the next translation pipeline. For now,
|
|
||||||
# let's just accept an empty string instead.
|
|
||||||
THROW_ON_EMPTY: false
|
|
||||||
|
|
||||||
run: |
|
|
||||||
mkdir /tmp/records
|
|
||||||
npm run sync-search-indices -- \
|
|
||||||
--language en \
|
|
||||||
--version dotcom \
|
|
||||||
--out-directory /tmp/records \
|
|
||||||
--no-compression --no-lunr-index
|
|
||||||
|
|
||||||
ls -lh /tmp/records
|
|
||||||
|
|
||||||
# Serves two purposes;
|
|
||||||
# 1. Be confident that the Elasticsearch server start-up worked at all
|
|
||||||
# 2. Sometimes Elasticsearch will bind to the port but still not
|
|
||||||
# technically be ready. By using `curl --retry` we can know it's
|
|
||||||
# also genuinely ready to use.
|
|
||||||
- name: Ping Elasticsearch
|
|
||||||
run: curl --retry-connrefused --retry 5 -I http://localhost:9200/
|
|
||||||
|
|
||||||
- name: Index some
|
|
||||||
env:
|
|
||||||
ELASTICSEARCH_URL: 'http://localhost:9200'
|
|
||||||
run: |
|
|
||||||
./script/search/index-elasticsearch.js --verbose \
|
|
||||||
-l en \
|
|
||||||
-V dotcom -- /tmp/records
|
|
||||||
|
|
||||||
- name: Show created indexes and aliases
|
|
||||||
run: |
|
|
||||||
curl http://localhost:9200/_cat/indices?v
|
|
||||||
curl http://localhost:9200/_cat/aliases?v
|
|
||||||
@@ -101,8 +101,7 @@ jobs:
|
|||||||
mkdir /tmp/records
|
mkdir /tmp/records
|
||||||
npm run sync-search-indices -- \
|
npm run sync-search-indices -- \
|
||||||
--language ${{ matrix.language }} \
|
--language ${{ matrix.language }} \
|
||||||
--out-directory /tmp/records \
|
/tmp/records
|
||||||
--no-compression --no-lunr-index
|
|
||||||
|
|
||||||
ls -lh /tmp/records
|
ls -lh /tmp/records
|
||||||
|
|
||||||
@@ -118,7 +117,8 @@ jobs:
|
|||||||
VERSION: ${{ github.event.inputs.version }}
|
VERSION: ${{ github.event.inputs.version }}
|
||||||
run: |
|
run: |
|
||||||
./script/search/index-elasticsearch.js \
|
./script/search/index-elasticsearch.js \
|
||||||
--language ${{ matrix.language }} -- /tmp/records
|
--language ${{ matrix.language }} \
|
||||||
|
/tmp/records
|
||||||
|
|
||||||
- name: Check created indexes and aliases
|
- name: Check created indexes and aliases
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
10
.github/workflows/sync-search-pr.yml
vendored
10
.github/workflows/sync-search-pr.yml
vendored
@@ -8,8 +8,8 @@ name: Sync search - PR
|
|||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
- script/search/parse-page-sections-into-records.js
|
- 'script/search/**'
|
||||||
- script/search/popular-pages.js
|
- 'package*.json'
|
||||||
- lib/search/popular-pages.json
|
- lib/search/popular-pages.json
|
||||||
# Ultimately, for debugging this workflow itself
|
# Ultimately, for debugging this workflow itself
|
||||||
- .github/workflows/sync-search-pr.yml
|
- .github/workflows/sync-search-pr.yml
|
||||||
@@ -98,8 +98,7 @@ jobs:
|
|||||||
npm run sync-search-indices -- \
|
npm run sync-search-indices -- \
|
||||||
--language en \
|
--language en \
|
||||||
--version dotcom \
|
--version dotcom \
|
||||||
--out-directory /tmp/records \
|
/tmp/records
|
||||||
--no-compression --no-lunr-index
|
|
||||||
|
|
||||||
ls -lh /tmp/records
|
ls -lh /tmp/records
|
||||||
|
|
||||||
@@ -111,7 +110,8 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
./script/search/index-elasticsearch.js \
|
./script/search/index-elasticsearch.js \
|
||||||
--language en \
|
--language en \
|
||||||
--version dotcom -- /tmp/records
|
--version dotcom \
|
||||||
|
/tmp/records
|
||||||
|
|
||||||
- name: Check created indexes and aliases
|
- name: Check created indexes and aliases
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
@@ -45,12 +45,6 @@ The Actions workflow progress can be viewed (by GitHub employees) in the [Action
|
|||||||
|
|
||||||
You can manually run the workflow to generate the indexes after you push your changes to `main` to speed up the indexing when needed. It's recommended to do this for only the `free-pro-team@latest` version and the `en` language because running all languages and versions takes about 40 minutes. To run it manually, click "Run workflow" button in the [Actions tab](https://github.com/github/docs-internal/actions/workflows/sync-search-indices.yml). Enter the language and version you'd like to generate the indexes for as inputs to the workflow. By default, all languages and versions are generated.
|
You can manually run the workflow to generate the indexes after you push your changes to `main` to speed up the indexing when needed. It's recommended to do this for only the `free-pro-team@latest` version and the `en` language because running all languages and versions takes about 40 minutes. To run it manually, click "Run workflow" button in the [Actions tab](https://github.com/github/docs-internal/actions/workflows/sync-search-indices.yml). Enter the language and version you'd like to generate the indexes for as inputs to the workflow. By default, all languages and versions are generated.
|
||||||
|
|
||||||
## Generating search indexes for your local checkout
|
|
||||||
|
|
||||||
You can locally generate search indexes, but please do not check them into your local branch because they can get out-of-sync with the `main` branch quickly.
|
|
||||||
|
|
||||||
To locally generate the English version of the Dotcom search index locally, run `LANGUAGE=en VERSION=free-pro-team@latest npm run sync-search`. See [Build and sync](#build-and-sync) below for more details. To revert those files run `git checkout lib/search/indexes`.
|
|
||||||
|
|
||||||
### Build and sync
|
### Build and sync
|
||||||
|
|
||||||
To build all the indices (this takes about an hour):
|
To build all the indices (this takes about an hour):
|
||||||
|
|||||||
@@ -1,24 +0,0 @@
|
|||||||
import { promisify } from 'util'
|
|
||||||
import zlib from 'zlib'
|
|
||||||
const brotliCompress = promisify(zlib.brotliCompress)
|
|
||||||
const brotliDecompress = promisify(zlib.brotliDecompress)
|
|
||||||
|
|
||||||
const options = {
|
|
||||||
params: {
|
|
||||||
[zlib.constants.BROTLI_PARAM_MODE]: zlib.constants.BROTLI_MODE_TEXT,
|
|
||||||
[zlib.constants.BROTLI_PARAM_QUALITY]: 6,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function compress(data) {
|
|
||||||
return brotliCompress(data, options)
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function decompress(data) {
|
|
||||||
return brotliDecompress(data, options)
|
|
||||||
}
|
|
||||||
|
|
||||||
export default {
|
|
||||||
compress,
|
|
||||||
decompress,
|
|
||||||
}
|
|
||||||
@@ -1,401 +0,0 @@
|
|||||||
import { fileURLToPath } from 'url'
|
|
||||||
import path from 'path'
|
|
||||||
import lunr from 'lunr'
|
|
||||||
import fs from 'fs/promises'
|
|
||||||
import lunrStemmerSupport from 'lunr-languages/lunr.stemmer.support.js'
|
|
||||||
import tinyseg from 'lunr-languages/tinyseg.js'
|
|
||||||
import lunrJa from 'lunr-languages/lunr.ja.js'
|
|
||||||
import lunrEs from 'lunr-languages/lunr.es.js'
|
|
||||||
import lunrPt from 'lunr-languages/lunr.pt.js'
|
|
||||||
import { get } from 'lodash-es'
|
|
||||||
import statsd from '../statsd.js'
|
|
||||||
import { namePrefix } from './config.js'
|
|
||||||
import { decompress } from './compress.js'
|
|
||||||
const __dirname = path.dirname(fileURLToPath(import.meta.url))
|
|
||||||
|
|
||||||
// By default Lunr considers the `-` character to be a word boundary.
|
|
||||||
// This allows hypens to be included in the query.
|
|
||||||
// If you change this, remember to make it match the indexing separator
|
|
||||||
// in script/search/lunr-search-index.js so the query is tokenized
|
|
||||||
// identically to the way it was indexed.
|
|
||||||
lunr.QueryLexer.termSeparator = /[\s]+/
|
|
||||||
lunrStemmerSupport(lunr)
|
|
||||||
tinyseg(lunr)
|
|
||||||
lunrJa(lunr)
|
|
||||||
lunrEs(lunr)
|
|
||||||
lunrPt(lunr)
|
|
||||||
|
|
||||||
const LUNR_DIR = './indexes'
|
|
||||||
const lunrIndexes = new Map()
|
|
||||||
const lunrRecords = new Map()
|
|
||||||
|
|
||||||
// Max size of the `.content` record included in the JSON payload that the
|
|
||||||
// middleware server will serve.
|
|
||||||
// The reason we're worrying about that here and not in the middleware
|
|
||||||
// is because what we're *ultimately* sending is HTML so we can't let
|
|
||||||
// the consumer of this module, slice it as a regular string because
|
|
||||||
// they might cut off an HTML tag in the middle.
|
|
||||||
// As of Oct 2021, with the way the CSS works inside components/Search.tsx
|
|
||||||
// roughly 450-650 characters is contained. Let's just make sure we're
|
|
||||||
// well within limit. So no visual difference, but smaller JSON payloads.
|
|
||||||
const MAX_CONTENT_LENGTH = 1000
|
|
||||||
|
|
||||||
export class QueryTermError extends Error {}
|
|
||||||
export class QueryPrefixError extends QueryTermError {}
|
|
||||||
|
|
||||||
export default async function loadLunrResults({ version, language, query, limit }) {
|
|
||||||
const indexName = `${namePrefix}-${version}-${language}`
|
|
||||||
if (!lunrIndexes.has(indexName) || !lunrRecords.has(indexName)) {
|
|
||||||
lunrIndexes.set(indexName, await loadLunrIndex(indexName))
|
|
||||||
lunrRecords.set(indexName, await loadLunrRecords(indexName))
|
|
||||||
statsd.increment('middleware.lunr_cold_index', 1, [`index:${indexName}`])
|
|
||||||
statsd.gauge('memory_heap_used', process.memoryUsage().heapUsed, ['event:lunr-index'])
|
|
||||||
}
|
|
||||||
const index = lunrIndexes.get(indexName)
|
|
||||||
const records = lunrRecords.get(indexName)
|
|
||||||
const queryLength = query.trim().length
|
|
||||||
|
|
||||||
for (const word of query.trim().split(/\s+/g) || []) {
|
|
||||||
// By splitting up the query into words, we can use ^ at the start
|
|
||||||
// of the regex. That avoids "Polynomial regular expression used on
|
|
||||||
// uncontrolled data" warning because the regex can be evalulated
|
|
||||||
// from left to right quickly.
|
|
||||||
for (const match of word.matchAll(/^(\w+):/g)) {
|
|
||||||
const validPrefixes = ['topics', 'title']
|
|
||||||
if (!validPrefixes.includes(match[1])) {
|
|
||||||
throw new QueryPrefixError(
|
|
||||||
`'${match[1]}' is not a valid prefix keyword. Must be one of (${validPrefixes})`
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// A search results /combined/ score is:
|
|
||||||
//
|
|
||||||
// normalizedScore + POPULARITY_FACTOR * record.popularity
|
|
||||||
//
|
|
||||||
// where the "normalizedScore" is the ratio of its Lunr score divided
|
|
||||||
// by the highest score of all found in Lunr. That means, that the record
|
|
||||||
// Lunr thinks matches the most becomes 1.0.
|
|
||||||
//
|
|
||||||
// It's the number we sort on. The `record.popularity` is always a
|
|
||||||
// number between (and including) 0-1.
|
|
||||||
// If the Lunr score is, say, 5.0 and the popularity is 0.1, and
|
|
||||||
// the POPULARITY_FACTOR is 10, the combined score is 5.0 + 10 * 0.1 = 6.0
|
|
||||||
// If you make this too large, the Lunr score becomes insignificant and
|
|
||||||
// any single match anywhere will always favor the popular documents.
|
|
||||||
// The best way to adjust this number is to get a feeling for what
|
|
||||||
// kinds of Lunr score numbers we're usually getting and adjust
|
|
||||||
// accordingly.
|
|
||||||
// Short queries are bound to be very ambigous and the more ambiguous
|
|
||||||
// the more relevant the popularity is.
|
|
||||||
const POPULARITY_FACTOR = queryLength <= 2 ? 25 : queryLength <= 6 ? 10 : 5
|
|
||||||
|
|
||||||
// This number determines how much more we favor the title search first.
|
|
||||||
// It's a multiplier. We do 2 searches: one on title, one on all other fields.
|
|
||||||
// Then, we compare all scores. But the scores in the results from the title
|
|
||||||
// we multiply that with this number.
|
|
||||||
// The effect is that we favor matches in the title more than we favor
|
|
||||||
// matches that were not in the title.
|
|
||||||
// If you search for 'foobar' and it appears in the title of one
|
|
||||||
// not-so-popular record, but also appears in the content of a
|
|
||||||
// very popular record, you want to give the title-matching one a
|
|
||||||
// leg up.
|
|
||||||
// Note that the Lunr scores from the content is usually much higher
|
|
||||||
// than scores on the title. E.g. the word `codespaces` might appear
|
|
||||||
// 10 times on a page that is actually about something else. If there's
|
|
||||||
// a record whose title includes `codespaces` it might get a very low
|
|
||||||
// Lunr score but since title matches are generally a "better", we
|
|
||||||
// want to make sure this number accounts for that.
|
|
||||||
const TITLE_FIRST = queryLength <= 2 ? 45 : queryLength <= 6 ? 25 : 10
|
|
||||||
|
|
||||||
// Multiplication bonus given to matches that were made on the
|
|
||||||
// the search where ALL tokens are required.
|
|
||||||
// E.g. you search for 'foo bar' and we have three records:
|
|
||||||
//
|
|
||||||
// A) "This foo is very special"
|
|
||||||
// B) "With bar and foo you can't go wrong"
|
|
||||||
// C) "Only bar can save you"
|
|
||||||
//
|
|
||||||
// What will happen is that it only finds record (B) when it's
|
|
||||||
// requires to match both 'foo' *and* 'bar'. So you get these scores:
|
|
||||||
//
|
|
||||||
// A) score = result.score + popularity
|
|
||||||
// B) score = MATCH_PHRASE * (result.score + popularity)
|
|
||||||
// C) score = result.score + popularity
|
|
||||||
//
|
|
||||||
// So it's very powerful multiplier. But that's fine because a
|
|
||||||
// "phrase match" is a very accurate thing.
|
|
||||||
const MATCH_PHRASE = 5
|
|
||||||
|
|
||||||
// Imagine that we have 1,000 documents. 100 of them contain the word
|
|
||||||
// 'foobar'. Of those 100, we want to display the top 10 "best".
|
|
||||||
// But if we only do `lunrindex.search('foobar').slice(0, 10)` we
|
|
||||||
// would slice prematurely. Instead, we do
|
|
||||||
// `lunrindex.search('foobar').slice(0, 100)` first, sort those,
|
|
||||||
// and in the final step, after any custom sorting, we `.slice(0, 10)`.
|
|
||||||
// This number decides how many to extract from Lunr in the first place
|
|
||||||
// that we're going to do our custom sorting on.
|
|
||||||
// This number can be allowed to be pretty big because we're only ever
|
|
||||||
// going to do the more time-consuming highlighting on the `limit`
|
|
||||||
// records that we finally return.
|
|
||||||
const PRE_LIMIT = 500
|
|
||||||
|
|
||||||
const titleQuery = query.trim()
|
|
||||||
|
|
||||||
let highestTitleScore = 0.0
|
|
||||||
|
|
||||||
const andTitleResults = []
|
|
||||||
|
|
||||||
// This will turn something like 'foo and bar' into:
|
|
||||||
// [
|
|
||||||
// { str: 'foo', metadata: { position: [Array], index: 0 } },
|
|
||||||
// { str: 'bar', metadata: { position: [Array], index: 1 } }
|
|
||||||
// ]
|
|
||||||
// Note how the stopword gets omitted.
|
|
||||||
// It's important to omit the stopwords because even if the record
|
|
||||||
// actually contains the stopword, it won't match then.
|
|
||||||
// E.g. you have a record called "Foo And Bar" and you search for
|
|
||||||
// {foo AND and AND bar} it will actually not find anything.
|
|
||||||
// But if you change it to {foo AND bar} it will match "Foo And Bar"
|
|
||||||
// Same goes if any other stopwords were used like "Foo the Bar with for a".
|
|
||||||
// That also needs to become an AND-search of {foo AND bar} ...only.
|
|
||||||
const titleQueryTokenized = lunr.tokenizer(titleQuery).filter(lunr.stopWordFilter)
|
|
||||||
|
|
||||||
if (titleQueryTokenized.length > 1) {
|
|
||||||
andTitleResults.push(
|
|
||||||
...index
|
|
||||||
.query((q) => {
|
|
||||||
for (const { str } of titleQueryTokenized) {
|
|
||||||
q.term(str, { fields: ['title'], presence: lunr.Query.presence.REQUIRED })
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.slice(0, PRE_LIMIT)
|
|
||||||
.map((result) => {
|
|
||||||
const { popularity } = records[result.ref]
|
|
||||||
if (result.score > highestTitleScore) {
|
|
||||||
highestTitleScore = result.score
|
|
||||||
}
|
|
||||||
const score = result.score / highestTitleScore
|
|
||||||
return {
|
|
||||||
result,
|
|
||||||
_score: MATCH_PHRASE * TITLE_FIRST * (score + POPULARITY_FACTOR * (popularity || 0.0)),
|
|
||||||
}
|
|
||||||
})
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
const titleResults = index
|
|
||||||
.query((q) => {
|
|
||||||
// The objective is to create an OR-query specifically for the 'title'
|
|
||||||
// because *we* value matches on that much higher than any other
|
|
||||||
// field in our records.
|
|
||||||
// But we want to make sure that the last word is always treated
|
|
||||||
// like a forward-tokenized token. I.e. you typed "google ku"
|
|
||||||
// becomes a search for "google ku*".
|
|
||||||
// Note that it's import that use the `lunr.tokenizer()` function when
|
|
||||||
// using the `index.query()` function because, for starters, it will
|
|
||||||
// normalize the input.
|
|
||||||
// If you use `index.search()` is the higher abstraction of basically
|
|
||||||
// doing this:
|
|
||||||
// (pseudo code)
|
|
||||||
//
|
|
||||||
// Index.prototype.search = function(input) {
|
|
||||||
// lunr.tokenize(input).forEach(token => {
|
|
||||||
// Index.query(callback => {
|
|
||||||
// callback(token)
|
|
||||||
// })
|
|
||||||
// })
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// If we didn't use the tokenized form, we'd get different results
|
|
||||||
// for searching for "SSH agent" and "ssh AgenT" for example.
|
|
||||||
titleQueryTokenized.forEach(({ str }, i) => {
|
|
||||||
const isLastToken = i === titleQueryTokenized.length - 1
|
|
||||||
const isShort = str.length <= 3
|
|
||||||
q.term(str, {
|
|
||||||
fields: ['title'],
|
|
||||||
wildcard:
|
|
||||||
isLastToken && isShort ? lunr.Query.wildcard.TRAILING : lunr.Query.wildcard.NONE,
|
|
||||||
})
|
|
||||||
})
|
|
||||||
})
|
|
||||||
.slice(0, PRE_LIMIT)
|
|
||||||
.map((result) => {
|
|
||||||
const { popularity } = records[result.ref]
|
|
||||||
if (result.score > highestTitleScore) {
|
|
||||||
highestTitleScore = result.score
|
|
||||||
}
|
|
||||||
const score = result.score / highestTitleScore
|
|
||||||
return {
|
|
||||||
result,
|
|
||||||
_score: TITLE_FIRST * (score + POPULARITY_FACTOR * (popularity || 0.0)),
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
let allQuery = query.trim()
|
|
||||||
|
|
||||||
// Unfortunately, Lunr currently doesn't support phrase matching
|
|
||||||
// so you always end up with 0 results if you search for `"foo bar"`.
|
|
||||||
// In this case it's better to do a search for `foo` and `bar`.
|
|
||||||
if (
|
|
||||||
allQuery.startsWith('"') &&
|
|
||||||
allQuery.endsWith('"') &&
|
|
||||||
(allQuery.match(/"/g) || []).length === 2
|
|
||||||
) {
|
|
||||||
allQuery = allQuery.slice(1, -1)
|
|
||||||
}
|
|
||||||
|
|
||||||
let highestAllScore = 0.0
|
|
||||||
const allResults = index
|
|
||||||
.search(allQuery)
|
|
||||||
.slice(0, PRE_LIMIT)
|
|
||||||
.map((result) => {
|
|
||||||
const { popularity } = records[result.ref]
|
|
||||||
if (result.score > highestAllScore) {
|
|
||||||
highestAllScore = result.score
|
|
||||||
}
|
|
||||||
const score = result.score / highestAllScore
|
|
||||||
return {
|
|
||||||
result,
|
|
||||||
score,
|
|
||||||
_score: score + POPULARITY_FACTOR * (popularity || 0.0),
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
const _unique = new Set()
|
|
||||||
const combinedMatchData = {}
|
|
||||||
const results = []
|
|
||||||
for (const matches of [andTitleResults, titleResults, allResults]) {
|
|
||||||
for (const match of matches) {
|
|
||||||
const { result } = match
|
|
||||||
// We need to loop over all results (both from title searches and
|
|
||||||
// from all-field searches) but we can only keep one.
|
|
||||||
// But before we do that filtering (i.e. omitting previous kept)
|
|
||||||
// we need to merge all the matchData from each result.
|
|
||||||
// That's because the `result.matchData` from the title search
|
|
||||||
// will have Lunr match positions for 'title' but the `result.matchData`
|
|
||||||
// from the all-field search, will have positions for other things
|
|
||||||
// such as 'content' and 'breadcrumbs'.
|
|
||||||
|
|
||||||
combinedMatchData[result.ref] = Object.assign(
|
|
||||||
combinedMatchData[result.ref] || {},
|
|
||||||
result.matchData
|
|
||||||
)
|
|
||||||
|
|
||||||
if (_unique.has(result.ref)) continue
|
|
||||||
_unique.add(result.ref)
|
|
||||||
|
|
||||||
results.push(match)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Highest score first
|
|
||||||
results.sort((a, b) => b._score - a._score)
|
|
||||||
|
|
||||||
// We might have found much more than `limit` number of matches and we've
|
|
||||||
// taken them all out for our custom sorting. Now, once that's done,
|
|
||||||
// of the ones we're going to return we apply the highlighting.
|
|
||||||
// The reasonsing is that the highlighting work isn't free and it'd
|
|
||||||
// be a waste to do it on results we're not going to return anyway.
|
|
||||||
return results.slice(0, limit).map(({ result }) => {
|
|
||||||
const record = records[result.ref]
|
|
||||||
const matchData = combinedMatchData[result.ref]
|
|
||||||
return {
|
|
||||||
url: result.ref,
|
|
||||||
breadcrumbs: field(matchData, record, 'breadcrumbs'),
|
|
||||||
title: field(matchData, record, 'title'),
|
|
||||||
content: smartSlice(field(matchData, record, 'content'), MAX_CONTENT_LENGTH),
|
|
||||||
// don't highlight the topics array
|
|
||||||
topics: record.topics,
|
|
||||||
score: result.score,
|
|
||||||
popularity: record.popularity || 0.0,
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
async function loadLunrIndex(indexName) {
|
|
||||||
const filePath = path.posix.join(__dirname, LUNR_DIR, `${indexName}.json.br`)
|
|
||||||
// Do not set to 'utf8' on file reads
|
|
||||||
return fs.readFile(filePath).then(decompress).then(JSON.parse).then(lunr.Index.load)
|
|
||||||
}
|
|
||||||
|
|
||||||
async function loadLunrRecords(indexName) {
|
|
||||||
const filePath = path.posix.join(__dirname, LUNR_DIR, `${indexName}-records.json.br`)
|
|
||||||
// Do not set to 'utf8' on file reads
|
|
||||||
return fs.readFile(filePath).then(decompress).then(JSON.parse)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Highlight a match within an attribute field
|
|
||||||
function field(matchData, record, name) {
|
|
||||||
const text = record[name]
|
|
||||||
if (!text) return text
|
|
||||||
|
|
||||||
// First, get a list of all the positions of the matching tokens
|
|
||||||
const positions = Object.values(matchData.metadata)
|
|
||||||
.map((fields) => get(fields, [name, 'position']))
|
|
||||||
.filter(Boolean)
|
|
||||||
.flat()
|
|
||||||
.sort((a, b) => a[0] - b[0])
|
|
||||||
.map(([start, length]) => [start, start + length])
|
|
||||||
.map(([start, end], i, a) => [i && a[i - 1][1], start, end])
|
|
||||||
|
|
||||||
// If this field has no token matches, no highlighting
|
|
||||||
if (!positions.length) return text
|
|
||||||
|
|
||||||
// Highlight the text
|
|
||||||
const highlighted = positions
|
|
||||||
.map(([prev, start, end], i) => [
|
|
||||||
text.slice(prev, start),
|
|
||||||
mark(text.slice(start, end)),
|
|
||||||
i === positions.length - 1 && text.slice(end),
|
|
||||||
])
|
|
||||||
.flat()
|
|
||||||
.filter(Boolean)
|
|
||||||
.join('')
|
|
||||||
|
|
||||||
// We can't HTML escape the content until AFTER all the matchData positions
|
|
||||||
// have been processed otherwise, the positions should shift.
|
|
||||||
// The only HTML that is OK to keep is <mark> and </mark>.
|
|
||||||
return highlighted
|
|
||||||
.replace(/&/g, '&')
|
|
||||||
.replace(/</g, '<')
|
|
||||||
.replace(/>/g, '>')
|
|
||||||
.replace(/<mark>/g, '<mark>')
|
|
||||||
.replace(/<\/mark>/g, '</mark>')
|
|
||||||
}
|
|
||||||
|
|
||||||
function mark(text) {
|
|
||||||
return `<mark>${text}</mark>`
|
|
||||||
}
|
|
||||||
|
|
||||||
// Give a long string, "slice" it in a safe way so as to not chop any
|
|
||||||
// HTML tags in half.
|
|
||||||
// The resulting string will only be at *least* as long as the `length`
|
|
||||||
// provided. Possibly longer.
|
|
||||||
function smartSlice(text, length, needleTag = '<mark>') {
|
|
||||||
// If the needleTag isn't present at all, we can dare to use a
|
|
||||||
// very basic crude string slice because the text won't have any
|
|
||||||
// other HTML tags we might cut in half.
|
|
||||||
if (!text.includes(needleTag)) {
|
|
||||||
return text.slice(0, length)
|
|
||||||
}
|
|
||||||
|
|
||||||
// The algorithm is simple, split the text by lines. Loop over them,
|
|
||||||
// and only include them if we've encountered the first needleTag
|
|
||||||
// and bail early if we've buffered enough in the array of lines.
|
|
||||||
const lines = []
|
|
||||||
let sum = 0
|
|
||||||
let started = false
|
|
||||||
for (const line of text.split('\n')) {
|
|
||||||
if (line.indexOf(needleTag) > -1) started = true
|
|
||||||
if (started) {
|
|
||||||
lines.push(line)
|
|
||||||
sum += line.length
|
|
||||||
if (sum > length) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return lines.join('\n')
|
|
||||||
}
|
|
||||||
313
package-lock.json
generated
313
package-lock.json
generated
@@ -51,8 +51,6 @@
|
|||||||
"lodash": "^4.17.21",
|
"lodash": "^4.17.21",
|
||||||
"lodash-es": "^4.17.21",
|
"lodash-es": "^4.17.21",
|
||||||
"lowdb": "5.0.5",
|
"lowdb": "5.0.5",
|
||||||
"lunr": "^2.3.9",
|
|
||||||
"lunr-languages": "^1.9.0",
|
|
||||||
"mdast-util-from-markdown": "^1.2.0",
|
"mdast-util-from-markdown": "^1.2.0",
|
||||||
"mdast-util-to-string": "^3.1.0",
|
"mdast-util-to-string": "^3.1.0",
|
||||||
"morgan": "^1.10.0",
|
"morgan": "^1.10.0",
|
||||||
@@ -5651,9 +5649,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/babel-loader": {
|
"node_modules/babel-loader": {
|
||||||
"version": "9.1.0",
|
"version": "9.0.1",
|
||||||
"resolved": "https://registry.npmjs.org/babel-loader/-/babel-loader-9.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/babel-loader/-/babel-loader-9.0.1.tgz",
|
||||||
"integrity": "sha512-Antt61KJPinUMwHwIIz9T5zfMgevnfZkEVWYDWlG888fgdvRRGD0JTuf/fFozQnfT+uq64sk1bmdHDy/mOEWnA==",
|
"integrity": "sha512-szYjslOXFlj/po5KfrVmiuBAcI6GVHFuAgC96Qd6mMPHdwl4lmAJkYtvjQ1RxxPjgdkKjd3LQgXDE4jxEutNuw==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"find-cache-dir": "^3.3.2",
|
"find-cache-dir": "^3.3.2",
|
||||||
@@ -7477,7 +7475,7 @@
|
|||||||
"node_modules/css-url-parser": {
|
"node_modules/css-url-parser": {
|
||||||
"version": "1.1.3",
|
"version": "1.1.3",
|
||||||
"resolved": "https://registry.npmjs.org/css-url-parser/-/css-url-parser-1.1.3.tgz",
|
"resolved": "https://registry.npmjs.org/css-url-parser/-/css-url-parser-1.1.3.tgz",
|
||||||
"integrity": "sha512-KO4HrqK3lAlrnobbBEHib/lFRw7kGOlQTLYhwTwWzDEGilGTYIYOpI22d+6euyZiqfZpV96pii87ZufifbxpqA==",
|
"integrity": "sha1-qkAeXT3RwLkwTAlgKLuZIAH/XJc=",
|
||||||
"optional": true
|
"optional": true
|
||||||
},
|
},
|
||||||
"node_modules/css-what": {
|
"node_modules/css-what": {
|
||||||
@@ -10642,9 +10640,9 @@
|
|||||||
"license": "ISC"
|
"license": "ISC"
|
||||||
},
|
},
|
||||||
"node_modules/image-size": {
|
"node_modules/image-size": {
|
||||||
"version": "1.0.1",
|
"version": "1.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/image-size/-/image-size-1.0.1.tgz",
|
"resolved": "https://registry.npmjs.org/image-size/-/image-size-1.0.2.tgz",
|
||||||
"integrity": "sha512-VAwkvNSNGClRw9mDHhc5Efax8PLlsOGcUTh0T/LIriC8vPA3U5PdqXWqkz406MoYHMKW8Uf9gWr05T/rYB44kQ==",
|
"integrity": "sha512-xfOoWjceHntRb3qFCrh5ZFORYH8XCdYpASltMhZ/Q0KZiOwjdE/Yl2QCiWdwD+lygV5bMCvauzgu5PxBX/Yerg==",
|
||||||
"optional": true,
|
"optional": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"queue": "6.0.2"
|
"queue": "6.0.2"
|
||||||
@@ -10653,7 +10651,7 @@
|
|||||||
"image-size": "bin/image-size.js"
|
"image-size": "bin/image-size.js"
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=12.0.0"
|
"node": ">=14.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/immutable": {
|
"node_modules/immutable": {
|
||||||
@@ -14290,14 +14288,6 @@
|
|||||||
"node": ">=10"
|
"node": ">=10"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/lunr": {
|
|
||||||
"version": "2.3.9",
|
|
||||||
"license": "MIT"
|
|
||||||
},
|
|
||||||
"node_modules/lunr-languages": {
|
|
||||||
"version": "1.9.0",
|
|
||||||
"license": "MPL-1.1"
|
|
||||||
},
|
|
||||||
"node_modules/make-dir": {
|
"node_modules/make-dir": {
|
||||||
"version": "3.1.0",
|
"version": "3.1.0",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
@@ -15983,9 +15973,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/p-queue": {
|
"node_modules/p-queue": {
|
||||||
"version": "7.3.0",
|
"version": "7.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/p-queue/-/p-queue-7.3.0.tgz",
|
"resolved": "https://registry.npmjs.org/p-queue/-/p-queue-7.2.0.tgz",
|
||||||
"integrity": "sha512-5fP+yVQ0qp0rEfZoDTlP2c3RYBgxvRsw30qO+VtPPc95lyvSG+x6USSh1TuLB4n96IO6I8/oXQGsTgtna4q2nQ==",
|
"integrity": "sha512-Kvv7p13M46lTYLQ/PsZdaj/1Vj6u/8oiIJgyQyx4oVkOfHdd7M2EZvXigDvcsSzRwanCzQirV5bJPQFoSQt5MA==",
|
||||||
"optional": true,
|
"optional": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"eventemitter3": "^4.0.7",
|
"eventemitter3": "^4.0.7",
|
||||||
@@ -19387,7 +19377,7 @@
|
|||||||
"node_modules/truncate-utf8-bytes": {
|
"node_modules/truncate-utf8-bytes": {
|
||||||
"version": "1.0.2",
|
"version": "1.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/truncate-utf8-bytes/-/truncate-utf8-bytes-1.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/truncate-utf8-bytes/-/truncate-utf8-bytes-1.0.2.tgz",
|
||||||
"integrity": "sha512-95Pu1QXQvruGEhv62XCMO3Mm90GscOCClvrIUwCM0PYOXK3kaF3l3sIHxx71ThJfcbM2O5Au6SO3AWCSEfW4mQ==",
|
"integrity": "sha1-QFkjkJWS1W94pYGENLC3hInKXys=",
|
||||||
"optional": true,
|
"optional": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"utf8-byte-length": "^1.0.1"
|
"utf8-byte-length": "^1.0.1"
|
||||||
@@ -19912,7 +19902,7 @@
|
|||||||
"node_modules/utf8-byte-length": {
|
"node_modules/utf8-byte-length": {
|
||||||
"version": "1.0.4",
|
"version": "1.0.4",
|
||||||
"resolved": "https://registry.npmjs.org/utf8-byte-length/-/utf8-byte-length-1.0.4.tgz",
|
"resolved": "https://registry.npmjs.org/utf8-byte-length/-/utf8-byte-length-1.0.4.tgz",
|
||||||
"integrity": "sha512-4+wkEYLBbWxqTahEsWrhxepcoVOJ+1z5PGIjPZxRkytcdSUaNjIjBM7Xn8E+pdSuV7SzvWovBFA54FO0JSoqhA==",
|
"integrity": "sha1-9F8VDExm7uloGGUFq5P8u4rWv2E=",
|
||||||
"optional": true
|
"optional": true
|
||||||
},
|
},
|
||||||
"node_modules/util-deprecate": {
|
"node_modules/util-deprecate": {
|
||||||
@@ -20321,17 +20311,16 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/website-scraper": {
|
"node_modules/website-scraper": {
|
||||||
"version": "5.0.0",
|
"version": "5.3.1",
|
||||||
"resolved": "https://registry.npmjs.org/website-scraper/-/website-scraper-5.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/website-scraper/-/website-scraper-5.3.1.tgz",
|
||||||
"integrity": "sha512-wZP7fSQR86UZSCXfKzd5OlgBb6AdxXN6gVN07Hy2wYxp2+GeqQAIw+sbqXNlPQnpJLwmRZDWp2u6KeuaFOhotw==",
|
"integrity": "sha512-gogqPXD2gVsxoyd2yRiympw3rA5GuEpD1CaDEJ/J8zzanx7hkbTtneoO1SGs436PpLbWVcUge+6APGLhzsuZPA==",
|
||||||
"optional": true,
|
"optional": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"cheerio": "1.0.0-rc.10",
|
"cheerio": "1.0.0-rc.12",
|
||||||
"css-url-parser": "^1.0.0",
|
"css-url-parser": "^1.0.0",
|
||||||
"debug": "^4.3.1",
|
"debug": "^4.3.1",
|
||||||
"fs-extra": "^10.0.0",
|
"fs-extra": "^10.0.0",
|
||||||
"got": "^12.0.0",
|
"got": "^12.0.0",
|
||||||
"lodash": "^4.17.21",
|
|
||||||
"normalize-url": "^7.0.2",
|
"normalize-url": "^7.0.2",
|
||||||
"p-queue": "^7.1.0",
|
"p-queue": "^7.1.0",
|
||||||
"sanitize-filename": "^1.6.3",
|
"sanitize-filename": "^1.6.3",
|
||||||
@@ -20341,121 +20330,6 @@
|
|||||||
"node": ">=14.14"
|
"node": ">=14.14"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/website-scraper/node_modules/cheerio": {
|
|
||||||
"version": "1.0.0-rc.10",
|
|
||||||
"resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.0.0-rc.10.tgz",
|
|
||||||
"integrity": "sha512-g0J0q/O6mW8z5zxQ3A8E8J1hUgp4SMOvEoW/x84OwyHKe/Zccz83PVT4y5Crcr530FV6NgmKI1qvGTKVl9XXVw==",
|
|
||||||
"optional": true,
|
|
||||||
"dependencies": {
|
|
||||||
"cheerio-select": "^1.5.0",
|
|
||||||
"dom-serializer": "^1.3.2",
|
|
||||||
"domhandler": "^4.2.0",
|
|
||||||
"htmlparser2": "^6.1.0",
|
|
||||||
"parse5": "^6.0.1",
|
|
||||||
"parse5-htmlparser2-tree-adapter": "^6.0.1",
|
|
||||||
"tslib": "^2.2.0"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">= 6"
|
|
||||||
},
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/cheeriojs/cheerio?sponsor=1"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/website-scraper/node_modules/cheerio-select": {
|
|
||||||
"version": "1.6.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-1.6.0.tgz",
|
|
||||||
"integrity": "sha512-eq0GdBvxVFbqWgmCm7M3XGs1I8oLy/nExUnh6oLqmBditPO9AqQJrkslDpMun/hZ0yyTs8L0m85OHp4ho6Qm9g==",
|
|
||||||
"optional": true,
|
|
||||||
"dependencies": {
|
|
||||||
"css-select": "^4.3.0",
|
|
||||||
"css-what": "^6.0.1",
|
|
||||||
"domelementtype": "^2.2.0",
|
|
||||||
"domhandler": "^4.3.1",
|
|
||||||
"domutils": "^2.8.0"
|
|
||||||
},
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/sponsors/fb55"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/website-scraper/node_modules/css-select": {
|
|
||||||
"version": "4.3.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/css-select/-/css-select-4.3.0.tgz",
|
|
||||||
"integrity": "sha512-wPpOYtnsVontu2mODhA19JrqWxNsfdatRKd64kmpRbQgh1KtItko5sTnEpPdpSaJszTOhEMlF/RPz28qj4HqhQ==",
|
|
||||||
"optional": true,
|
|
||||||
"dependencies": {
|
|
||||||
"boolbase": "^1.0.0",
|
|
||||||
"css-what": "^6.0.1",
|
|
||||||
"domhandler": "^4.3.1",
|
|
||||||
"domutils": "^2.8.0",
|
|
||||||
"nth-check": "^2.0.1"
|
|
||||||
},
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/sponsors/fb55"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/website-scraper/node_modules/dom-serializer": {
|
|
||||||
"version": "1.4.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-1.4.1.tgz",
|
|
||||||
"integrity": "sha512-VHwB3KfrcOOkelEG2ZOfxqLZdfkil8PtJi4P8N2MMXucZq2yLp75ClViUlOVwyoHEDjYU433Aq+5zWP61+RGag==",
|
|
||||||
"optional": true,
|
|
||||||
"dependencies": {
|
|
||||||
"domelementtype": "^2.0.1",
|
|
||||||
"domhandler": "^4.2.0",
|
|
||||||
"entities": "^2.0.0"
|
|
||||||
},
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/cheeriojs/dom-serializer?sponsor=1"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/website-scraper/node_modules/domhandler": {
|
|
||||||
"version": "4.3.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/domhandler/-/domhandler-4.3.1.tgz",
|
|
||||||
"integrity": "sha512-GrwoxYN+uWlzO8uhUXRl0P+kHE4GtVPfYzVLcUxPL7KNdHKj66vvlhiweIHqYYXWlw+T8iLMp42Lm67ghw4WMQ==",
|
|
||||||
"optional": true,
|
|
||||||
"dependencies": {
|
|
||||||
"domelementtype": "^2.2.0"
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">= 4"
|
|
||||||
},
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/fb55/domhandler?sponsor=1"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/website-scraper/node_modules/domutils": {
|
|
||||||
"version": "2.8.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/domutils/-/domutils-2.8.0.tgz",
|
|
||||||
"integrity": "sha512-w96Cjofp72M5IIhpjgobBimYEfoPjx1Vx0BSX9P30WBdZW2WIKU0T1Bd0kz2eNZ9ikjKgHbEyKx8BB6H1L3h3A==",
|
|
||||||
"optional": true,
|
|
||||||
"dependencies": {
|
|
||||||
"dom-serializer": "^1.0.1",
|
|
||||||
"domelementtype": "^2.2.0",
|
|
||||||
"domhandler": "^4.2.0"
|
|
||||||
},
|
|
||||||
"funding": {
|
|
||||||
"url": "https://github.com/fb55/domutils?sponsor=1"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/website-scraper/node_modules/htmlparser2": {
|
|
||||||
"version": "6.1.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-6.1.0.tgz",
|
|
||||||
"integrity": "sha512-gyyPk6rgonLFEDGoeRgQNaEUvdJ4ktTmmUh/h2t7s+M8oPpIPxgNACWa+6ESR57kXstwqPiCut0V8NRpcwgU7A==",
|
|
||||||
"funding": [
|
|
||||||
"https://github.com/fb55/htmlparser2?sponsor=1",
|
|
||||||
{
|
|
||||||
"type": "github",
|
|
||||||
"url": "https://github.com/sponsors/fb55"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"optional": true,
|
|
||||||
"dependencies": {
|
|
||||||
"domelementtype": "^2.0.1",
|
|
||||||
"domhandler": "^4.0.0",
|
|
||||||
"domutils": "^2.5.2",
|
|
||||||
"entities": "^2.0.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/website-scraper/node_modules/normalize-url": {
|
"node_modules/website-scraper/node_modules/normalize-url": {
|
||||||
"version": "7.2.0",
|
"version": "7.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/normalize-url/-/normalize-url-7.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/normalize-url/-/normalize-url-7.2.0.tgz",
|
||||||
@@ -20468,21 +20342,6 @@
|
|||||||
"url": "https://github.com/sponsors/sindresorhus"
|
"url": "https://github.com/sponsors/sindresorhus"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/website-scraper/node_modules/parse5": {
|
|
||||||
"version": "6.0.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/parse5/-/parse5-6.0.1.tgz",
|
|
||||||
"integrity": "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==",
|
|
||||||
"optional": true
|
|
||||||
},
|
|
||||||
"node_modules/website-scraper/node_modules/parse5-htmlparser2-tree-adapter": {
|
|
||||||
"version": "6.0.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-6.0.1.tgz",
|
|
||||||
"integrity": "sha512-qPuWvbLgvDGilKc5BoicRovlT4MtYT6JfJyBOMDsKoiT+GiuP5qyrPCnR9HcPECIJJmZh5jRndyNThnhhb/vlA==",
|
|
||||||
"optional": true,
|
|
||||||
"dependencies": {
|
|
||||||
"parse5": "^6.0.1"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/which": {
|
"node_modules/which": {
|
||||||
"version": "2.0.2",
|
"version": "2.0.2",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
@@ -24780,9 +24639,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"babel-loader": {
|
"babel-loader": {
|
||||||
"version": "9.1.0",
|
"version": "9.0.1",
|
||||||
"resolved": "https://registry.npmjs.org/babel-loader/-/babel-loader-9.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/babel-loader/-/babel-loader-9.0.1.tgz",
|
||||||
"integrity": "sha512-Antt61KJPinUMwHwIIz9T5zfMgevnfZkEVWYDWlG888fgdvRRGD0JTuf/fFozQnfT+uq64sk1bmdHDy/mOEWnA==",
|
"integrity": "sha512-szYjslOXFlj/po5KfrVmiuBAcI6GVHFuAgC96Qd6mMPHdwl4lmAJkYtvjQ1RxxPjgdkKjd3LQgXDE4jxEutNuw==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"find-cache-dir": "^3.3.2",
|
"find-cache-dir": "^3.3.2",
|
||||||
@@ -26103,7 +25962,7 @@
|
|||||||
"css-url-parser": {
|
"css-url-parser": {
|
||||||
"version": "1.1.3",
|
"version": "1.1.3",
|
||||||
"resolved": "https://registry.npmjs.org/css-url-parser/-/css-url-parser-1.1.3.tgz",
|
"resolved": "https://registry.npmjs.org/css-url-parser/-/css-url-parser-1.1.3.tgz",
|
||||||
"integrity": "sha512-KO4HrqK3lAlrnobbBEHib/lFRw7kGOlQTLYhwTwWzDEGilGTYIYOpI22d+6euyZiqfZpV96pii87ZufifbxpqA==",
|
"integrity": "sha1-qkAeXT3RwLkwTAlgKLuZIAH/XJc=",
|
||||||
"optional": true
|
"optional": true
|
||||||
},
|
},
|
||||||
"css-what": {
|
"css-what": {
|
||||||
@@ -28238,9 +28097,9 @@
|
|||||||
"dev": true
|
"dev": true
|
||||||
},
|
},
|
||||||
"image-size": {
|
"image-size": {
|
||||||
"version": "1.0.1",
|
"version": "1.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/image-size/-/image-size-1.0.1.tgz",
|
"resolved": "https://registry.npmjs.org/image-size/-/image-size-1.0.2.tgz",
|
||||||
"integrity": "sha512-VAwkvNSNGClRw9mDHhc5Efax8PLlsOGcUTh0T/LIriC8vPA3U5PdqXWqkz406MoYHMKW8Uf9gWr05T/rYB44kQ==",
|
"integrity": "sha512-xfOoWjceHntRb3qFCrh5ZFORYH8XCdYpASltMhZ/Q0KZiOwjdE/Yl2QCiWdwD+lygV5bMCvauzgu5PxBX/Yerg==",
|
||||||
"optional": true,
|
"optional": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"queue": "6.0.2"
|
"queue": "6.0.2"
|
||||||
@@ -30902,12 +30761,6 @@
|
|||||||
"yallist": "^4.0.0"
|
"yallist": "^4.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"lunr": {
|
|
||||||
"version": "2.3.9"
|
|
||||||
},
|
|
||||||
"lunr-languages": {
|
|
||||||
"version": "1.9.0"
|
|
||||||
},
|
|
||||||
"make-dir": {
|
"make-dir": {
|
||||||
"version": "3.1.0",
|
"version": "3.1.0",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
@@ -31925,9 +31778,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"p-queue": {
|
"p-queue": {
|
||||||
"version": "7.3.0",
|
"version": "7.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/p-queue/-/p-queue-7.3.0.tgz",
|
"resolved": "https://registry.npmjs.org/p-queue/-/p-queue-7.2.0.tgz",
|
||||||
"integrity": "sha512-5fP+yVQ0qp0rEfZoDTlP2c3RYBgxvRsw30qO+VtPPc95lyvSG+x6USSh1TuLB4n96IO6I8/oXQGsTgtna4q2nQ==",
|
"integrity": "sha512-Kvv7p13M46lTYLQ/PsZdaj/1Vj6u/8oiIJgyQyx4oVkOfHdd7M2EZvXigDvcsSzRwanCzQirV5bJPQFoSQt5MA==",
|
||||||
"optional": true,
|
"optional": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"eventemitter3": "^4.0.7",
|
"eventemitter3": "^4.0.7",
|
||||||
@@ -34200,7 +34053,7 @@
|
|||||||
"truncate-utf8-bytes": {
|
"truncate-utf8-bytes": {
|
||||||
"version": "1.0.2",
|
"version": "1.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/truncate-utf8-bytes/-/truncate-utf8-bytes-1.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/truncate-utf8-bytes/-/truncate-utf8-bytes-1.0.2.tgz",
|
||||||
"integrity": "sha512-95Pu1QXQvruGEhv62XCMO3Mm90GscOCClvrIUwCM0PYOXK3kaF3l3sIHxx71ThJfcbM2O5Au6SO3AWCSEfW4mQ==",
|
"integrity": "sha1-QFkjkJWS1W94pYGENLC3hInKXys=",
|
||||||
"optional": true,
|
"optional": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"utf8-byte-length": "^1.0.1"
|
"utf8-byte-length": "^1.0.1"
|
||||||
@@ -34549,7 +34402,7 @@
|
|||||||
"utf8-byte-length": {
|
"utf8-byte-length": {
|
||||||
"version": "1.0.4",
|
"version": "1.0.4",
|
||||||
"resolved": "https://registry.npmjs.org/utf8-byte-length/-/utf8-byte-length-1.0.4.tgz",
|
"resolved": "https://registry.npmjs.org/utf8-byte-length/-/utf8-byte-length-1.0.4.tgz",
|
||||||
"integrity": "sha512-4+wkEYLBbWxqTahEsWrhxepcoVOJ+1z5PGIjPZxRkytcdSUaNjIjBM7Xn8E+pdSuV7SzvWovBFA54FO0JSoqhA==",
|
"integrity": "sha1-9F8VDExm7uloGGUFq5P8u4rWv2E=",
|
||||||
"optional": true
|
"optional": true
|
||||||
},
|
},
|
||||||
"util-deprecate": {
|
"util-deprecate": {
|
||||||
@@ -34835,127 +34688,27 @@
|
|||||||
"peer": true
|
"peer": true
|
||||||
},
|
},
|
||||||
"website-scraper": {
|
"website-scraper": {
|
||||||
"version": "5.0.0",
|
"version": "5.3.1",
|
||||||
"resolved": "https://registry.npmjs.org/website-scraper/-/website-scraper-5.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/website-scraper/-/website-scraper-5.3.1.tgz",
|
||||||
"integrity": "sha512-wZP7fSQR86UZSCXfKzd5OlgBb6AdxXN6gVN07Hy2wYxp2+GeqQAIw+sbqXNlPQnpJLwmRZDWp2u6KeuaFOhotw==",
|
"integrity": "sha512-gogqPXD2gVsxoyd2yRiympw3rA5GuEpD1CaDEJ/J8zzanx7hkbTtneoO1SGs436PpLbWVcUge+6APGLhzsuZPA==",
|
||||||
"optional": true,
|
"optional": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"cheerio": "1.0.0-rc.10",
|
"cheerio": "1.0.0-rc.12",
|
||||||
"css-url-parser": "^1.0.0",
|
"css-url-parser": "^1.0.0",
|
||||||
"debug": "^4.3.1",
|
"debug": "^4.3.1",
|
||||||
"fs-extra": "^10.0.0",
|
"fs-extra": "^10.0.0",
|
||||||
"got": "^12.0.0",
|
"got": "^12.0.0",
|
||||||
"lodash": "^4.17.21",
|
|
||||||
"normalize-url": "^7.0.2",
|
"normalize-url": "^7.0.2",
|
||||||
"p-queue": "^7.1.0",
|
"p-queue": "^7.1.0",
|
||||||
"sanitize-filename": "^1.6.3",
|
"sanitize-filename": "^1.6.3",
|
||||||
"srcset": "^5.0.0"
|
"srcset": "^5.0.0"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"cheerio": {
|
|
||||||
"version": "1.0.0-rc.10",
|
|
||||||
"resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.0.0-rc.10.tgz",
|
|
||||||
"integrity": "sha512-g0J0q/O6mW8z5zxQ3A8E8J1hUgp4SMOvEoW/x84OwyHKe/Zccz83PVT4y5Crcr530FV6NgmKI1qvGTKVl9XXVw==",
|
|
||||||
"optional": true,
|
|
||||||
"requires": {
|
|
||||||
"cheerio-select": "^1.5.0",
|
|
||||||
"dom-serializer": "^1.3.2",
|
|
||||||
"domhandler": "^4.2.0",
|
|
||||||
"htmlparser2": "^6.1.0",
|
|
||||||
"parse5": "^6.0.1",
|
|
||||||
"parse5-htmlparser2-tree-adapter": "^6.0.1",
|
|
||||||
"tslib": "^2.2.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"cheerio-select": {
|
|
||||||
"version": "1.6.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-1.6.0.tgz",
|
|
||||||
"integrity": "sha512-eq0GdBvxVFbqWgmCm7M3XGs1I8oLy/nExUnh6oLqmBditPO9AqQJrkslDpMun/hZ0yyTs8L0m85OHp4ho6Qm9g==",
|
|
||||||
"optional": true,
|
|
||||||
"requires": {
|
|
||||||
"css-select": "^4.3.0",
|
|
||||||
"css-what": "^6.0.1",
|
|
||||||
"domelementtype": "^2.2.0",
|
|
||||||
"domhandler": "^4.3.1",
|
|
||||||
"domutils": "^2.8.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"css-select": {
|
|
||||||
"version": "4.3.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/css-select/-/css-select-4.3.0.tgz",
|
|
||||||
"integrity": "sha512-wPpOYtnsVontu2mODhA19JrqWxNsfdatRKd64kmpRbQgh1KtItko5sTnEpPdpSaJszTOhEMlF/RPz28qj4HqhQ==",
|
|
||||||
"optional": true,
|
|
||||||
"requires": {
|
|
||||||
"boolbase": "^1.0.0",
|
|
||||||
"css-what": "^6.0.1",
|
|
||||||
"domhandler": "^4.3.1",
|
|
||||||
"domutils": "^2.8.0",
|
|
||||||
"nth-check": "^2.0.1"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"dom-serializer": {
|
|
||||||
"version": "1.4.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-1.4.1.tgz",
|
|
||||||
"integrity": "sha512-VHwB3KfrcOOkelEG2ZOfxqLZdfkil8PtJi4P8N2MMXucZq2yLp75ClViUlOVwyoHEDjYU433Aq+5zWP61+RGag==",
|
|
||||||
"optional": true,
|
|
||||||
"requires": {
|
|
||||||
"domelementtype": "^2.0.1",
|
|
||||||
"domhandler": "^4.2.0",
|
|
||||||
"entities": "^2.0.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"domhandler": {
|
|
||||||
"version": "4.3.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/domhandler/-/domhandler-4.3.1.tgz",
|
|
||||||
"integrity": "sha512-GrwoxYN+uWlzO8uhUXRl0P+kHE4GtVPfYzVLcUxPL7KNdHKj66vvlhiweIHqYYXWlw+T8iLMp42Lm67ghw4WMQ==",
|
|
||||||
"optional": true,
|
|
||||||
"requires": {
|
|
||||||
"domelementtype": "^2.2.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"domutils": {
|
|
||||||
"version": "2.8.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/domutils/-/domutils-2.8.0.tgz",
|
|
||||||
"integrity": "sha512-w96Cjofp72M5IIhpjgobBimYEfoPjx1Vx0BSX9P30WBdZW2WIKU0T1Bd0kz2eNZ9ikjKgHbEyKx8BB6H1L3h3A==",
|
|
||||||
"optional": true,
|
|
||||||
"requires": {
|
|
||||||
"dom-serializer": "^1.0.1",
|
|
||||||
"domelementtype": "^2.2.0",
|
|
||||||
"domhandler": "^4.2.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"htmlparser2": {
|
|
||||||
"version": "6.1.0",
|
|
||||||
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-6.1.0.tgz",
|
|
||||||
"integrity": "sha512-gyyPk6rgonLFEDGoeRgQNaEUvdJ4ktTmmUh/h2t7s+M8oPpIPxgNACWa+6ESR57kXstwqPiCut0V8NRpcwgU7A==",
|
|
||||||
"optional": true,
|
|
||||||
"requires": {
|
|
||||||
"domelementtype": "^2.0.1",
|
|
||||||
"domhandler": "^4.0.0",
|
|
||||||
"domutils": "^2.5.2",
|
|
||||||
"entities": "^2.0.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"normalize-url": {
|
"normalize-url": {
|
||||||
"version": "7.2.0",
|
"version": "7.2.0",
|
||||||
"resolved": "https://registry.npmjs.org/normalize-url/-/normalize-url-7.2.0.tgz",
|
"resolved": "https://registry.npmjs.org/normalize-url/-/normalize-url-7.2.0.tgz",
|
||||||
"integrity": "sha512-uhXOdZry0L6M2UIo9BTt7FdpBDiAGN/7oItedQwPKh8jh31ZlvC8U9Xl/EJ3aijDHaywXTW3QbZ6LuCocur1YA==",
|
"integrity": "sha512-uhXOdZry0L6M2UIo9BTt7FdpBDiAGN/7oItedQwPKh8jh31ZlvC8U9Xl/EJ3aijDHaywXTW3QbZ6LuCocur1YA==",
|
||||||
"optional": true
|
"optional": true
|
||||||
},
|
|
||||||
"parse5": {
|
|
||||||
"version": "6.0.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/parse5/-/parse5-6.0.1.tgz",
|
|
||||||
"integrity": "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==",
|
|
||||||
"optional": true
|
|
||||||
},
|
|
||||||
"parse5-htmlparser2-tree-adapter": {
|
|
||||||
"version": "6.0.1",
|
|
||||||
"resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-6.0.1.tgz",
|
|
||||||
"integrity": "sha512-qPuWvbLgvDGilKc5BoicRovlT4MtYT6JfJyBOMDsKoiT+GiuP5qyrPCnR9HcPECIJJmZh5jRndyNThnhhb/vlA==",
|
|
||||||
"optional": true,
|
|
||||||
"requires": {
|
|
||||||
"parse5": "^6.0.1"
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -53,8 +53,6 @@
|
|||||||
"lodash": "^4.17.21",
|
"lodash": "^4.17.21",
|
||||||
"lodash-es": "^4.17.21",
|
"lodash-es": "^4.17.21",
|
||||||
"lowdb": "5.0.5",
|
"lowdb": "5.0.5",
|
||||||
"lunr": "^2.3.9",
|
|
||||||
"lunr-languages": "^1.9.0",
|
|
||||||
"mdast-util-from-markdown": "^1.2.0",
|
"mdast-util-from-markdown": "^1.2.0",
|
||||||
"mdast-util-to-string": "^3.1.0",
|
"mdast-util-to-string": "^3.1.0",
|
||||||
"morgan": "^1.10.0",
|
"morgan": "^1.10.0",
|
||||||
|
|||||||
@@ -748,20 +748,6 @@ Creates Elasticsearch index, populates from records, moves the index alias, dele
|
|||||||
---
|
---
|
||||||
|
|
||||||
|
|
||||||
### [`search/lunr-get-index-names.js`](search/lunr-get-index-names.js)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
|
|
||||||
### [`search/lunr-search-index.js`](search/lunr-search-index.js)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
|
|
||||||
### [`search/parse-page-sections-into-records.js`](search/parse-page-sections-into-records.js)
|
### [`search/parse-page-sections-into-records.js`](search/parse-page-sections-into-records.js)
|
||||||
|
|
||||||
|
|
||||||
@@ -779,23 +765,6 @@ Creates Elasticsearch index, populates from records, moves the index alias, dele
|
|||||||
### [`search/search-index-records.js`](search/search-index-records.js)
|
### [`search/search-index-records.js`](search/search-index-records.js)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
|
|
||||||
### [`search/search-qa-data.json`](search/search-qa-data.json)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
|
|
||||||
### [`search/search-qa-test.js`](search/search-qa-test.js)
|
|
||||||
|
|
||||||
This script is a quality assurance test for the Lunr search configuration. This test runs example queries and expects a specific page to land in the top 3 results.
|
|
||||||
|
|
||||||
The data source used by this script is a JSON file `script/search/search-qa-data.json`, which is populated from spreadsheet data here: https://docs.google.com/spreadsheets/d/1Dt5JRVcmyAGWKBwGjwmXxi7Ww_vdfYLfZ-EFpu2S2CQ/edit?usp=sharing
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -18,7 +18,6 @@ import dotenv from 'dotenv'
|
|||||||
import { retryOnErrorTest } from '../helpers/retry-on-error-test.js'
|
import { retryOnErrorTest } from '../helpers/retry-on-error-test.js'
|
||||||
import { languageKeys } from '../../lib/languages.js'
|
import { languageKeys } from '../../lib/languages.js'
|
||||||
import { allVersions } from '../../lib/all-versions.js'
|
import { allVersions } from '../../lib/all-versions.js'
|
||||||
import { decompress } from '../../lib/search/compress.js'
|
|
||||||
import statsd from '../../lib/statsd.js'
|
import statsd from '../../lib/statsd.js'
|
||||||
|
|
||||||
// Now you can optionally have set the ELASTICSEARCH_URL in your .env file.
|
// Now you can optionally have set the ELASTICSEARCH_URL in your .env file.
|
||||||
@@ -237,8 +236,8 @@ async function indexVersion(
|
|||||||
verbose = false
|
verbose = false
|
||||||
) {
|
) {
|
||||||
// Note, it's a bit "weird" that numbered releases versions are
|
// Note, it's a bit "weird" that numbered releases versions are
|
||||||
// called the number but that's how the lib/search/indexes
|
// called the number but that's the convention the previous
|
||||||
// files were.
|
// search backend used
|
||||||
const indexVersion = shortNames[version].hasNumberedReleases
|
const indexVersion = shortNames[version].hasNumberedReleases
|
||||||
? shortNames[version].currentRelease
|
? shortNames[version].currentRelease
|
||||||
: shortNames[version].miscBaseName
|
: shortNames[version].miscBaseName
|
||||||
@@ -414,21 +413,9 @@ function escapeHTML(content) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async function loadRecords(indexName, sourceDirectory) {
|
async function loadRecords(indexName, sourceDirectory) {
|
||||||
// First try looking for the `$indexName-records.json.br` file.
|
|
||||||
// If that doens't work, look for the `$indexName-records.json` one.
|
|
||||||
try {
|
|
||||||
const filePath = path.join(sourceDirectory, `${indexName}-records.json.br`)
|
|
||||||
// Do not set to 'utf8' on file reads
|
|
||||||
const payload = await fs.readFile(filePath).then(decompress)
|
|
||||||
return JSON.parse(payload)
|
|
||||||
} catch (error) {
|
|
||||||
if (error.code === 'ENOENT') {
|
|
||||||
const filePath = path.join(sourceDirectory, `${indexName}-records.json`)
|
const filePath = path.join(sourceDirectory, `${indexName}-records.json`)
|
||||||
const payload = await fs.readFile(filePath)
|
const payload = await fs.readFile(filePath)
|
||||||
return JSON.parse(payload)
|
return JSON.parse(payload)
|
||||||
}
|
|
||||||
throw error
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function getSnowballLanguage(language) {
|
function getSnowballLanguage(language) {
|
||||||
|
|||||||
@@ -1,12 +0,0 @@
|
|||||||
#!/usr/bin/env node
|
|
||||||
import { fileURLToPath } from 'url'
|
|
||||||
import path from 'path'
|
|
||||||
import fs from 'fs/promises'
|
|
||||||
const __dirname = path.dirname(fileURLToPath(import.meta.url))
|
|
||||||
|
|
||||||
async function getIndexNames() {
|
|
||||||
const indexList = await fs.readdir(path.join(__dirname, '../../lib/search/indexes'))
|
|
||||||
return indexList.sort().map((index) => index.replace('.json.br', ''))
|
|
||||||
}
|
|
||||||
|
|
||||||
export default await getIndexNames()
|
|
||||||
@@ -1,114 +0,0 @@
|
|||||||
#!/usr/bin/env node
|
|
||||||
import { fileURLToPath } from 'url'
|
|
||||||
import path from 'path'
|
|
||||||
import lunr from 'lunr'
|
|
||||||
import lunrStemmerSupport from 'lunr-languages/lunr.stemmer.support.js'
|
|
||||||
import tinyseg from 'lunr-languages/tinyseg.js'
|
|
||||||
import lunrJa from 'lunr-languages/lunr.ja.js'
|
|
||||||
import lunrEs from 'lunr-languages/lunr.es.js'
|
|
||||||
import lunrPt from 'lunr-languages/lunr.pt.js'
|
|
||||||
import fs from 'fs/promises'
|
|
||||||
import validateRecords from './validate-records.js'
|
|
||||||
import { compress } from '../../lib/search/compress.js'
|
|
||||||
|
|
||||||
const __dirname = path.dirname(fileURLToPath(import.meta.url))
|
|
||||||
lunrStemmerSupport(lunr)
|
|
||||||
tinyseg(lunr)
|
|
||||||
lunrJa(lunr)
|
|
||||||
lunrEs(lunr)
|
|
||||||
lunrPt(lunr)
|
|
||||||
|
|
||||||
export default class LunrIndex {
|
|
||||||
constructor(name, records) {
|
|
||||||
this.name = name
|
|
||||||
|
|
||||||
// Add custom rankings
|
|
||||||
this.records = records.map((record) => {
|
|
||||||
return record
|
|
||||||
})
|
|
||||||
|
|
||||||
this.validate()
|
|
||||||
|
|
||||||
return this
|
|
||||||
}
|
|
||||||
|
|
||||||
validate() {
|
|
||||||
return validateRecords(this.name, this.records)
|
|
||||||
}
|
|
||||||
|
|
||||||
build() {
|
|
||||||
const language = this.name.split('-').pop()
|
|
||||||
const records = this.records
|
|
||||||
|
|
||||||
this.index = lunr(function constructIndex() {
|
|
||||||
// No arrow here!
|
|
||||||
if (['ja', 'es', 'pt'].includes(language)) {
|
|
||||||
this.use(lunr[language])
|
|
||||||
}
|
|
||||||
|
|
||||||
// By default Lunr considers the `-` character to be a word boundary.
|
|
||||||
// This allows hyphens to be included in the search index.
|
|
||||||
// If you change this, remember to make it match the indexing separator
|
|
||||||
// in lib/search/lunr-search.js so the query is tokenized
|
|
||||||
// identically to the way it was indexed.
|
|
||||||
this.tokenizer.separator = /[\s]+/
|
|
||||||
|
|
||||||
this.ref('objectID')
|
|
||||||
this.field('url')
|
|
||||||
this.field('breadcrumbs')
|
|
||||||
this.field('headings', { boost: 3 })
|
|
||||||
this.field('title', { boost: 5 })
|
|
||||||
this.field('content')
|
|
||||||
this.field('topics')
|
|
||||||
|
|
||||||
this.metadataWhitelist = ['position']
|
|
||||||
|
|
||||||
for (const record of records) {
|
|
||||||
this.add(record)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
toJSON() {
|
|
||||||
this.build()
|
|
||||||
return JSON.stringify(this.index, null, 2)
|
|
||||||
}
|
|
||||||
|
|
||||||
get recordsObject() {
|
|
||||||
return Object.fromEntries(this.records.map((record) => [record.objectID, record]))
|
|
||||||
}
|
|
||||||
|
|
||||||
async write({
|
|
||||||
outDirectory = path.posix.join(__dirname, '../../lib/search/indexes'),
|
|
||||||
compressFiles = true,
|
|
||||||
}) {
|
|
||||||
this.build()
|
|
||||||
|
|
||||||
// Write the parsed records
|
|
||||||
await Promise.resolve(this.recordsObject)
|
|
||||||
.then(JSON.stringify)
|
|
||||||
.then((str) => (compressFiles ? compress(str) : str))
|
|
||||||
.then((content) =>
|
|
||||||
fs.writeFile(
|
|
||||||
path.join(
|
|
||||||
outDirectory,
|
|
||||||
compressFiles ? `${this.name}-records.json.br` : `${this.name}-records.json`
|
|
||||||
),
|
|
||||||
content
|
|
||||||
// Do not set to 'utf8'
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
// Write the index
|
|
||||||
await Promise.resolve(this.index)
|
|
||||||
.then(JSON.stringify)
|
|
||||||
.then((str) => (compressFiles ? compress(str) : str))
|
|
||||||
.then((content) =>
|
|
||||||
fs.writeFile(
|
|
||||||
path.join(outDirectory, compressFiles ? `${this.name}.json.br` : `${this.name}.json`),
|
|
||||||
content
|
|
||||||
// Do not set to 'utf8'
|
|
||||||
)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,32 +1,17 @@
|
|||||||
#!/usr/bin/env node
|
#!/usr/bin/env node
|
||||||
import { fileURLToPath } from 'url'
|
|
||||||
import path from 'path'
|
import path from 'path'
|
||||||
import fs from 'fs/promises'
|
import fs from 'fs/promises'
|
||||||
|
|
||||||
import validateRecords from './validate-records.js'
|
import validateRecords from './validate-records.js'
|
||||||
import { compress } from '../../lib/search/compress.js'
|
|
||||||
|
|
||||||
const __dirname = path.dirname(fileURLToPath(import.meta.url))
|
export async function writeIndexRecords(name, records, outDirectory) {
|
||||||
|
|
||||||
export async function writeIndexRecords(
|
|
||||||
name,
|
|
||||||
records,
|
|
||||||
{
|
|
||||||
outDirectory = path.posix.join(__dirname, '../../lib/search/indexes'),
|
|
||||||
compressFiles = true,
|
|
||||||
prettyPrint = false,
|
|
||||||
}
|
|
||||||
) {
|
|
||||||
validateRecords(name, records)
|
validateRecords(name, records)
|
||||||
|
|
||||||
const recordsObject = Object.fromEntries(records.map((record) => [record.objectID, record]))
|
const recordsObject = Object.fromEntries(records.map((record) => [record.objectID, record]))
|
||||||
const content = JSON.stringify(recordsObject, undefined, prettyPrint ? 2 : 0)
|
const content = JSON.stringify(recordsObject, undefined, 0)
|
||||||
|
|
||||||
const filePath = path.join(
|
const filePath = path.join(outDirectory, `${name}-records.json`)
|
||||||
outDirectory,
|
await fs.writeFile(filePath, content)
|
||||||
compressFiles ? `${name}-records.json.br` : `${name}-records.json`
|
|
||||||
)
|
|
||||||
await fs.writeFile(filePath, compressFiles ? await compress(content) : content)
|
|
||||||
|
|
||||||
return filePath
|
return filePath
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,206 +0,0 @@
|
|||||||
[
|
|
||||||
{
|
|
||||||
"query": "interactions",
|
|
||||||
"href": "/rest/reference/interactions"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "repositories",
|
|
||||||
"href": "/rest/reference/repos"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "workflow_run",
|
|
||||||
"href": "/developers/webhooks-and-events/webhooks/webhook-events-and-payloads"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "workflow_dispatch",
|
|
||||||
"href": "/developers/webhooks-and-events/webhooks/webhook-events-and-payloads"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "pull_request",
|
|
||||||
"href": "/developers/webhooks-and-events/webhooks/webhook-events-and-payloads"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "workflow_run",
|
|
||||||
"href": "/actions/learn-github-actions/events-that-trigger-workflows"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "workflow_dispatch",
|
|
||||||
"href": "/actions/learn-github-actions/events-that-trigger-workflows"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "register for an account",
|
|
||||||
"href": "/get-started/signing-up-for-github/signing-up-for-a-new-github-account"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "registering on GitHub",
|
|
||||||
"href": "/get-started/signing-up-for-github/signing-up-for-a-new-github-account"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "signing up for a GitHub account",
|
|
||||||
"href": "/get-started/signing-up-for-github/signing-up-for-a-new-github-account"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "new account",
|
|
||||||
"href": "/get-started/signing-up-for-github/signing-up-for-a-new-github-account"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "create a GitHub account",
|
|
||||||
"href": "/get-started/signing-up-for-github/signing-up-for-a-new-github-account"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "apis",
|
|
||||||
"href": "/graphql"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "apis",
|
|
||||||
"href": "/rest"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "api",
|
|
||||||
"href": "/graphql"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "api",
|
|
||||||
"href": "/rest"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "create a new branch",
|
|
||||||
"href": "/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-and-deleting-branches-within-your-repository"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "fix merge conflict",
|
|
||||||
"href": "/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-using-the-command-line"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "conflicts",
|
|
||||||
"href": "/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-using-the-command-line"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "merge conflict",
|
|
||||||
"href": "/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-using-the-command-line"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "branch conflicts",
|
|
||||||
"href": "/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-using-the-command-line"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "conflicting files",
|
|
||||||
"href": "/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-using-the-command-line"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "resolve conflicts",
|
|
||||||
"href": "/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-using-the-command-line"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "fix merge conflict",
|
|
||||||
"href": "/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-on-github"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "conflicts",
|
|
||||||
"href": "/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-on-github"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "merge conflict",
|
|
||||||
"href": "/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-on-github"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "branch conflicts",
|
|
||||||
"href": "/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-on-github"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "conflicting files",
|
|
||||||
"href": "/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-on-github"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "resolve conflicts",
|
|
||||||
"href": "/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-on-github"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "actions billable minutes",
|
|
||||||
"href": "/billing/managing-billing-for-github-actions/about-billing-for-github-actions"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "actions trigger pull requests",
|
|
||||||
"href": "/actions/learn-github-actions/events-that-trigger-workflows"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "about teams",
|
|
||||||
"href": "/organizations/organizing-members-into-teams/about-teams"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "about organizations",
|
|
||||||
"href": "/organizations/collaborating-with-groups-in-organizations/about-organizations"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "create pages site",
|
|
||||||
"href": "/pages/getting-started-with-github-pages/creating-a-github-pages-site"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "create pages site",
|
|
||||||
"href": "/pages/setting-up-a-github-pages-site-with-jekyll/creating-a-github-pages-site-with-jekyll"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "make a team",
|
|
||||||
"href": "/organizations/organizing-members-into-teams/creating-a-team"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "new team",
|
|
||||||
"href": "/organizations/organizing-members-into-teams/creating-a-team"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "team",
|
|
||||||
"href": "/organizations/organizing-members-into-teams/about-teams"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "rest create issue",
|
|
||||||
"href": "/rest/reference/issues"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "fork",
|
|
||||||
"href": "/rest/reference/repos"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "commit email",
|
|
||||||
"href": "/account-and-profile/setting-up-and-managing-your-github-user-account/managing-email-preferences/setting-your-commit-email-address"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "graphql organization",
|
|
||||||
"href": "/graphql/reference/objects"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "device flow",
|
|
||||||
"href": "/developers/apps/building-oauth-apps/authorizing-oauth-apps"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "convert user",
|
|
||||||
"href": "/account-and-profile/setting-up-and-managing-your-github-user-account/managing-user-account-settings/converting-a-user-into-an-organization"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "add email",
|
|
||||||
"href": "/account-and-profile/setting-up-and-managing-your-github-user-account/managing-email-preferences/adding-an-email-address-to-your-github-account"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "transfer ownership",
|
|
||||||
"href": "/organizations/managing-organization-settings/transferring-organization-ownership"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "merge accounts",
|
|
||||||
"href": "/account-and-profile/setting-up-and-managing-your-github-user-account/managing-user-account-settings/merging-multiple-user-accounts"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "search syntax",
|
|
||||||
"href": "/search-github/getting-started-with-searching-on-github/understanding-the-search-syntax"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "scim okta",
|
|
||||||
"href": "/organizations/managing-saml-single-sign-on-for-your-organization/configuring-saml-single-sign-on-and-scim-using-okta"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "keeping your account and data secure",
|
|
||||||
"href": "/authentication/keeping-your-account-and-data-secure"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"query": "ssh troubleshoot",
|
|
||||||
"href": "/authentication/troubleshooting-ssh"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
@@ -1,93 +0,0 @@
|
|||||||
#!/usr/bin/env node
|
|
||||||
|
|
||||||
// [start-readme]
|
|
||||||
//
|
|
||||||
// This script is a quality assurance test for the Lunr search configuration.
|
|
||||||
// This test runs example queries and expects a specific page to land in the top
|
|
||||||
// 3 results.
|
|
||||||
//
|
|
||||||
// The data source used by this script is a JSON file `script/search/search-qa-data.json`,
|
|
||||||
// which is populated from spreadsheet data here:
|
|
||||||
// https://docs.google.com/spreadsheets/d/1Dt5JRVcmyAGWKBwGjwmXxi7Ww_vdfYLfZ-EFpu2S2CQ/edit?usp=sharing
|
|
||||||
//
|
|
||||||
// [end-readme]
|
|
||||||
|
|
||||||
import loadLunrResults from '../../lib/search/lunr-search.js'
|
|
||||||
import { readFileSync } from 'fs'
|
|
||||||
import { join } from 'path'
|
|
||||||
|
|
||||||
const queryData = JSON.parse(readFileSync(join(process.cwd(), 'script/search/search-qa-data.json')))
|
|
||||||
|
|
||||||
const version = 'dotcom'
|
|
||||||
const language = 'en'
|
|
||||||
const limit = 10
|
|
||||||
const TOP_RANK = 3
|
|
||||||
|
|
||||||
main()
|
|
||||||
|
|
||||||
async function main() {
|
|
||||||
const rankResults = []
|
|
||||||
|
|
||||||
for (const item in queryData) {
|
|
||||||
const { query, href } = queryData[item]
|
|
||||||
|
|
||||||
try {
|
|
||||||
const results = await loadLunrResults({
|
|
||||||
version,
|
|
||||||
language,
|
|
||||||
query,
|
|
||||||
limit,
|
|
||||||
})
|
|
||||||
|
|
||||||
const hrefs = results.map((result) => result.url.replace('/en', ''))
|
|
||||||
let rank = hrefs.indexOf(href)
|
|
||||||
// this allows us to sort the results by rank, including total misses
|
|
||||||
if (rank === -1) {
|
|
||||||
rank = limit
|
|
||||||
}
|
|
||||||
rankResults.push({ query, href, rank })
|
|
||||||
} catch (err) {
|
|
||||||
console.error(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
logResults(rankResults)
|
|
||||||
}
|
|
||||||
|
|
||||||
async function logResults(results) {
|
|
||||||
results.sort((a, b) => a.rank - b.rank)
|
|
||||||
|
|
||||||
let first = 0
|
|
||||||
let top = 0
|
|
||||||
let low = 0
|
|
||||||
let miss = 0
|
|
||||||
results.forEach((result) => {
|
|
||||||
const { query, href, rank } = result
|
|
||||||
if (rank === limit) {
|
|
||||||
miss++
|
|
||||||
console.log(`🔴 query: ${query} - Expected href: ${href}\n`)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if (rank === 0) {
|
|
||||||
first++
|
|
||||||
console.log(`⭐ Query: ${query} - Expected href: ${href}`)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if (rank < TOP_RANK) {
|
|
||||||
top++
|
|
||||||
console.log(`🟢 Query: ${query} - Expected href: ${href}`)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
low++
|
|
||||||
console.log(`🟡 Query: ${query} - Expected href: ${href}`)
|
|
||||||
})
|
|
||||||
|
|
||||||
const firstPercentage = ((first / queryData.length) * 100).toFixed(1)
|
|
||||||
const topPercentage = ((top / queryData.length) * 100).toFixed(1)
|
|
||||||
const lowPercentage = ((low / queryData.length) * 100).toFixed(1)
|
|
||||||
const missPercentage = ((miss / queryData.length) * 100).toFixed(1)
|
|
||||||
|
|
||||||
console.log(`\n⭐ First hit ${firstPercentage}%`)
|
|
||||||
console.log(`\n🟢 Top ${TOP_RANK} hit ${topPercentage}%`)
|
|
||||||
console.log(`\n🟡 Top ${limit} hit ${lowPercentage}%`)
|
|
||||||
console.log(`\n🔴 Miss ${missPercentage}%`)
|
|
||||||
}
|
|
||||||
@@ -8,8 +8,6 @@
|
|||||||
// [end-readme]
|
// [end-readme]
|
||||||
|
|
||||||
import assert from 'assert'
|
import assert from 'assert'
|
||||||
import path from 'path'
|
|
||||||
|
|
||||||
import { program, Option } from 'commander'
|
import { program, Option } from 'commander'
|
||||||
|
|
||||||
import { languageKeys } from '../../lib/languages.js'
|
import { languageKeys } from '../../lib/languages.js'
|
||||||
@@ -27,10 +25,8 @@ const shortNames = Object.fromEntries(
|
|||||||
|
|
||||||
const allVersionKeys = [...Object.keys(shortNames), ...Object.keys(allVersions)]
|
const allVersionKeys = [...Object.keys(shortNames), ...Object.keys(allVersions)]
|
||||||
|
|
||||||
const DEFAULT_OUT_DIRECTORY = path.join('lib', 'search', 'indexes')
|
|
||||||
|
|
||||||
program
|
program
|
||||||
.description('Creates search records (and Lunr indexes) by scraping')
|
.description('Creates search records by scraping')
|
||||||
.option('-v, --verbose', 'Verbose outputs')
|
.option('-v, --verbose', 'Verbose outputs')
|
||||||
.addOption(new Option('-V, --version <VERSION>', 'Specific versions').choices(allVersionKeys))
|
.addOption(new Option('-V, --version <VERSION>', 'Specific versions').choices(allVersionKeys))
|
||||||
.addOption(
|
.addOption(
|
||||||
@@ -39,23 +35,14 @@ program
|
|||||||
.addOption(
|
.addOption(
|
||||||
new Option('--not-language <LANGUAGE>', 'Specific language to omit').choices(languageKeys)
|
new Option('--not-language <LANGUAGE>', 'Specific language to omit').choices(languageKeys)
|
||||||
)
|
)
|
||||||
.option('-d, --dry-run', 'Does not write to disk')
|
|
||||||
.option(
|
|
||||||
'-o, --out-directory <DIRECTORY>',
|
|
||||||
`Where to dump the created files (default ${DEFAULT_OUT_DIRECTORY})`
|
|
||||||
)
|
|
||||||
.option('--no-compression', `Do not Brotli compress the created .json files (default false)`)
|
|
||||||
// Once we've fully removed all Lunr indexing code, we can remove this option
|
|
||||||
// and change where it's used to be that the default is to not generate
|
|
||||||
// any Lunr indexes.
|
|
||||||
.option('--no-lunr-index', `Do not generate a Lunr index, just the records file (default false)`)
|
|
||||||
.option('--no-markers', 'Do not print a marker for each parsed document')
|
.option('--no-markers', 'Do not print a marker for each parsed document')
|
||||||
.option('--filter <MATCH>', 'Filter to only do pages that match this string')
|
.option('--filter <MATCH>', 'Filter to only do pages that match this string')
|
||||||
|
.argument('<out-directory>', 'where the indexable files should be written')
|
||||||
.parse(process.argv)
|
.parse(process.argv)
|
||||||
|
|
||||||
main(program.opts())
|
main(program.opts(), program.args)
|
||||||
|
|
||||||
async function main(opts) {
|
async function main(opts, args) {
|
||||||
let language
|
let language
|
||||||
if ('language' in opts) {
|
if ('language' in opts) {
|
||||||
language = opts.language
|
language = opts.language
|
||||||
@@ -117,18 +104,7 @@ async function main(opts) {
|
|||||||
`version must be undefined or one of ${Object.keys(allVersions)}`
|
`version must be undefined or one of ${Object.keys(allVersions)}`
|
||||||
)
|
)
|
||||||
|
|
||||||
let dryRun = false
|
const [outDirectory] = args
|
||||||
if ('dryRun' in opts) {
|
|
||||||
dryRun = opts.dryRun
|
|
||||||
} else {
|
|
||||||
dryRun = Boolean(JSON.parse(process.env.DRY_RUN || 'false'))
|
|
||||||
}
|
|
||||||
|
|
||||||
const outDirectory = opts.outDirectory || DEFAULT_OUT_DIRECTORY
|
|
||||||
|
|
||||||
const compressFiles = !!opts.compression
|
|
||||||
|
|
||||||
const generateLunrIndex = !!opts.lunrIndex
|
|
||||||
|
|
||||||
const config = {
|
const config = {
|
||||||
noMarkers: !opts.markers,
|
noMarkers: !opts.markers,
|
||||||
@@ -136,13 +112,10 @@ async function main(opts) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const options = {
|
const options = {
|
||||||
dryRun,
|
|
||||||
language,
|
language,
|
||||||
notLanguage,
|
notLanguage,
|
||||||
version: indexVersion,
|
version: indexVersion,
|
||||||
outDirectory,
|
outDirectory,
|
||||||
compressFiles,
|
|
||||||
generateLunrIndex,
|
|
||||||
config,
|
config,
|
||||||
}
|
}
|
||||||
await searchSync(options)
|
await searchSync(options)
|
||||||
|
|||||||
@@ -6,7 +6,6 @@ import buildRecords from './build-records.js'
|
|||||||
import findIndexablePages from './find-indexable-pages.js'
|
import findIndexablePages from './find-indexable-pages.js'
|
||||||
import { allVersions } from '../../lib/all-versions.js'
|
import { allVersions } from '../../lib/all-versions.js'
|
||||||
import { namePrefix } from '../../lib/search/config.js'
|
import { namePrefix } from '../../lib/search/config.js'
|
||||||
import LunrIndex from './lunr-search-index.js'
|
|
||||||
import { writeIndexRecords } from './search-index-records.js'
|
import { writeIndexRecords } from './search-index-records.js'
|
||||||
|
|
||||||
// Build a search data file for every combination of product version and language
|
// Build a search data file for every combination of product version and language
|
||||||
@@ -14,11 +13,8 @@ import { writeIndexRecords } from './search-index-records.js'
|
|||||||
export default async function syncSearchIndexes({
|
export default async function syncSearchIndexes({
|
||||||
language,
|
language,
|
||||||
version,
|
version,
|
||||||
dryRun,
|
|
||||||
notLanguage,
|
notLanguage,
|
||||||
outDirectory,
|
outDirectory,
|
||||||
compressFiles,
|
|
||||||
generateLunrIndex,
|
|
||||||
config = {},
|
config = {},
|
||||||
}) {
|
}) {
|
||||||
const t0 = new Date()
|
const t0 = new Date()
|
||||||
@@ -76,22 +72,10 @@ export default async function syncSearchIndexes({
|
|||||||
redirects,
|
redirects,
|
||||||
config
|
config
|
||||||
)
|
)
|
||||||
if (generateLunrIndex) {
|
const fileWritten = await writeIndexRecords(indexName, records, outDirectory)
|
||||||
const index = new LunrIndex(indexName, records)
|
|
||||||
|
|
||||||
if (!dryRun) {
|
|
||||||
await index.write({ outDirectory, compressFiles })
|
|
||||||
console.log('wrote index to file: ', indexName)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
const fileWritten = await writeIndexRecords(indexName, records, {
|
|
||||||
outDirectory,
|
|
||||||
compressFiles,
|
|
||||||
})
|
|
||||||
console.log(`wrote records to ${fileWritten}`)
|
console.log(`wrote records to ${fileWritten}`)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
const t1 = new Date()
|
const t1 = new Date()
|
||||||
const tookSec = (t1.getTime() - t0.getTime()) / 1000
|
const tookSec = (t1.getTime() - t0.getTime()) / 1000
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user