1
0
mirror of synced 2025-12-21 19:06:49 -05:00

remove lunr from codebase (#32432)

This commit is contained in:
Rachael Sewell
2022-11-10 13:36:13 -08:00
committed by GitHub
parent 8c357dcec1
commit 152a2399e2
17 changed files with 57 additions and 1383 deletions

View File

@@ -1,119 +0,0 @@
name: Dry run Elasticsearch indexing
# **What it does**: Tests to index records into a local Elasticsearch
# **Why we have it**: To make sure the indexing code works.
# **Who does it impact**: Docs engineering.
on:
merge_group:
pull_request:
paths:
- 'script/search/**'
- 'package*.json'
- .github/workflows/dry-run-elasticsearch-indexing.yml
permissions:
contents: read
jobs:
dry-run-elasticsearch-indexing:
# Avoid github/docs and forks of it
if: github.repository == 'github/docs-internal'
runs-on: ubuntu-20.04-xl
steps:
- uses: getong/elasticsearch-action@95b501ab0c83dee0aac7c39b7cea3723bef14954
with:
elasticsearch version: '8.2.0'
host port: 9200
container port: 9200
host node port: 9300
node port: 9300
discovery type: 'single-node'
- name: Checkout
uses: actions/checkout@dcd71f646680f2efd8db4afa5ad64fdcba30e748
with:
lfs: 'true'
- name: Check out LFS objects
run: git lfs checkout
- name: Setup node
uses: actions/setup-node@1f8c6b94b26d0feae1e387ca63ccbdc44d27b561
with:
node-version: 16.15.x
cache: npm
- name: Install dependencies
run: npm ci
- name: Cache nextjs build
uses: actions/cache@48af2dc4a9e8278b89d7fa154b955c30c6aaab09
with:
path: .next/cache
key: ${{ runner.os }}-nextjs-${{ hashFiles('package*.json') }}
- name: Run build scripts
run: npm run build
- name: Start the server in the background
env:
ENABLE_DEV_LOGGING: false
run: |
npm run sync-search-server > /tmp/stdout.log 2> /tmp/stderr.log &
# first sleep to give it a chance to start
sleep 6
curl --retry-connrefused --retry 4 -I http://localhost:4002/
- if: ${{ failure() }}
name: Debug server outputs on errors
run: |
echo "____STDOUT____"
cat /tmp/stdout.log
echo "____STDERR____"
cat /tmp/stderr.log
- name: Scrape records into a temp directory
env:
# If a reusable, or anything in the `data/*` directory is deleted
# you might get a
#
# RenderError: Can't find the key 'site.data.reusables...' in the scope
#
# But that'll get fixed in the next translation pipeline. For now,
# let's just accept an empty string instead.
THROW_ON_EMPTY: false
run: |
mkdir /tmp/records
npm run sync-search-indices -- \
--language en \
--version dotcom \
--out-directory /tmp/records \
--no-compression --no-lunr-index
ls -lh /tmp/records
# Serves two purposes;
# 1. Be confident that the Elasticsearch server start-up worked at all
# 2. Sometimes Elasticsearch will bind to the port but still not
# technically be ready. By using `curl --retry` we can know it's
# also genuinely ready to use.
- name: Ping Elasticsearch
run: curl --retry-connrefused --retry 5 -I http://localhost:9200/
- name: Index some
env:
ELASTICSEARCH_URL: 'http://localhost:9200'
run: |
./script/search/index-elasticsearch.js --verbose \
-l en \
-V dotcom -- /tmp/records
- name: Show created indexes and aliases
run: |
curl http://localhost:9200/_cat/indices?v
curl http://localhost:9200/_cat/aliases?v

View File

@@ -101,8 +101,7 @@ jobs:
mkdir /tmp/records mkdir /tmp/records
npm run sync-search-indices -- \ npm run sync-search-indices -- \
--language ${{ matrix.language }} \ --language ${{ matrix.language }} \
--out-directory /tmp/records \ /tmp/records
--no-compression --no-lunr-index
ls -lh /tmp/records ls -lh /tmp/records
@@ -118,7 +117,8 @@ jobs:
VERSION: ${{ github.event.inputs.version }} VERSION: ${{ github.event.inputs.version }}
run: | run: |
./script/search/index-elasticsearch.js \ ./script/search/index-elasticsearch.js \
--language ${{ matrix.language }} -- /tmp/records --language ${{ matrix.language }} \
/tmp/records
- name: Check created indexes and aliases - name: Check created indexes and aliases
run: | run: |

View File

@@ -8,8 +8,8 @@ name: Sync search - PR
on: on:
pull_request: pull_request:
paths: paths:
- script/search/parse-page-sections-into-records.js - 'script/search/**'
- script/search/popular-pages.js - 'package*.json'
- lib/search/popular-pages.json - lib/search/popular-pages.json
# Ultimately, for debugging this workflow itself # Ultimately, for debugging this workflow itself
- .github/workflows/sync-search-pr.yml - .github/workflows/sync-search-pr.yml
@@ -98,8 +98,7 @@ jobs:
npm run sync-search-indices -- \ npm run sync-search-indices -- \
--language en \ --language en \
--version dotcom \ --version dotcom \
--out-directory /tmp/records \ /tmp/records
--no-compression --no-lunr-index
ls -lh /tmp/records ls -lh /tmp/records
@@ -111,7 +110,8 @@ jobs:
run: | run: |
./script/search/index-elasticsearch.js \ ./script/search/index-elasticsearch.js \
--language en \ --language en \
--version dotcom -- /tmp/records --version dotcom \
/tmp/records
- name: Check created indexes and aliases - name: Check created indexes and aliases
run: | run: |

View File

@@ -45,12 +45,6 @@ The Actions workflow progress can be viewed (by GitHub employees) in the [Action
You can manually run the workflow to generate the indexes after you push your changes to `main` to speed up the indexing when needed. It's recommended to do this for only the `free-pro-team@latest` version and the `en` language because running all languages and versions takes about 40 minutes. To run it manually, click "Run workflow" button in the [Actions tab](https://github.com/github/docs-internal/actions/workflows/sync-search-indices.yml). Enter the language and version you'd like to generate the indexes for as inputs to the workflow. By default, all languages and versions are generated. You can manually run the workflow to generate the indexes after you push your changes to `main` to speed up the indexing when needed. It's recommended to do this for only the `free-pro-team@latest` version and the `en` language because running all languages and versions takes about 40 minutes. To run it manually, click "Run workflow" button in the [Actions tab](https://github.com/github/docs-internal/actions/workflows/sync-search-indices.yml). Enter the language and version you'd like to generate the indexes for as inputs to the workflow. By default, all languages and versions are generated.
## Generating search indexes for your local checkout
You can locally generate search indexes, but please do not check them into your local branch because they can get out-of-sync with the `main` branch quickly.
To locally generate the English version of the Dotcom search index locally, run `LANGUAGE=en VERSION=free-pro-team@latest npm run sync-search`. See [Build and sync](#build-and-sync) below for more details. To revert those files run `git checkout lib/search/indexes`.
### Build and sync ### Build and sync
To build all the indices (this takes about an hour): To build all the indices (this takes about an hour):

View File

@@ -1,24 +0,0 @@
import { promisify } from 'util'
import zlib from 'zlib'
const brotliCompress = promisify(zlib.brotliCompress)
const brotliDecompress = promisify(zlib.brotliDecompress)
const options = {
params: {
[zlib.constants.BROTLI_PARAM_MODE]: zlib.constants.BROTLI_MODE_TEXT,
[zlib.constants.BROTLI_PARAM_QUALITY]: 6,
},
}
export async function compress(data) {
return brotliCompress(data, options)
}
export async function decompress(data) {
return brotliDecompress(data, options)
}
export default {
compress,
decompress,
}

View File

@@ -1,401 +0,0 @@
import { fileURLToPath } from 'url'
import path from 'path'
import lunr from 'lunr'
import fs from 'fs/promises'
import lunrStemmerSupport from 'lunr-languages/lunr.stemmer.support.js'
import tinyseg from 'lunr-languages/tinyseg.js'
import lunrJa from 'lunr-languages/lunr.ja.js'
import lunrEs from 'lunr-languages/lunr.es.js'
import lunrPt from 'lunr-languages/lunr.pt.js'
import { get } from 'lodash-es'
import statsd from '../statsd.js'
import { namePrefix } from './config.js'
import { decompress } from './compress.js'
const __dirname = path.dirname(fileURLToPath(import.meta.url))
// By default Lunr considers the `-` character to be a word boundary.
// This allows hypens to be included in the query.
// If you change this, remember to make it match the indexing separator
// in script/search/lunr-search-index.js so the query is tokenized
// identically to the way it was indexed.
lunr.QueryLexer.termSeparator = /[\s]+/
lunrStemmerSupport(lunr)
tinyseg(lunr)
lunrJa(lunr)
lunrEs(lunr)
lunrPt(lunr)
const LUNR_DIR = './indexes'
const lunrIndexes = new Map()
const lunrRecords = new Map()
// Max size of the `.content` record included in the JSON payload that the
// middleware server will serve.
// The reason we're worrying about that here and not in the middleware
// is because what we're *ultimately* sending is HTML so we can't let
// the consumer of this module, slice it as a regular string because
// they might cut off an HTML tag in the middle.
// As of Oct 2021, with the way the CSS works inside components/Search.tsx
// roughly 450-650 characters is contained. Let's just make sure we're
// well within limit. So no visual difference, but smaller JSON payloads.
const MAX_CONTENT_LENGTH = 1000
export class QueryTermError extends Error {}
export class QueryPrefixError extends QueryTermError {}
export default async function loadLunrResults({ version, language, query, limit }) {
const indexName = `${namePrefix}-${version}-${language}`
if (!lunrIndexes.has(indexName) || !lunrRecords.has(indexName)) {
lunrIndexes.set(indexName, await loadLunrIndex(indexName))
lunrRecords.set(indexName, await loadLunrRecords(indexName))
statsd.increment('middleware.lunr_cold_index', 1, [`index:${indexName}`])
statsd.gauge('memory_heap_used', process.memoryUsage().heapUsed, ['event:lunr-index'])
}
const index = lunrIndexes.get(indexName)
const records = lunrRecords.get(indexName)
const queryLength = query.trim().length
for (const word of query.trim().split(/\s+/g) || []) {
// By splitting up the query into words, we can use ^ at the start
// of the regex. That avoids "Polynomial regular expression used on
// uncontrolled data" warning because the regex can be evalulated
// from left to right quickly.
for (const match of word.matchAll(/^(\w+):/g)) {
const validPrefixes = ['topics', 'title']
if (!validPrefixes.includes(match[1])) {
throw new QueryPrefixError(
`'${match[1]}' is not a valid prefix keyword. Must be one of (${validPrefixes})`
)
}
}
}
// A search results /combined/ score is:
//
// normalizedScore + POPULARITY_FACTOR * record.popularity
//
// where the "normalizedScore" is the ratio of its Lunr score divided
// by the highest score of all found in Lunr. That means, that the record
// Lunr thinks matches the most becomes 1.0.
//
// It's the number we sort on. The `record.popularity` is always a
// number between (and including) 0-1.
// If the Lunr score is, say, 5.0 and the popularity is 0.1, and
// the POPULARITY_FACTOR is 10, the combined score is 5.0 + 10 * 0.1 = 6.0
// If you make this too large, the Lunr score becomes insignificant and
// any single match anywhere will always favor the popular documents.
// The best way to adjust this number is to get a feeling for what
// kinds of Lunr score numbers we're usually getting and adjust
// accordingly.
// Short queries are bound to be very ambigous and the more ambiguous
// the more relevant the popularity is.
const POPULARITY_FACTOR = queryLength <= 2 ? 25 : queryLength <= 6 ? 10 : 5
// This number determines how much more we favor the title search first.
// It's a multiplier. We do 2 searches: one on title, one on all other fields.
// Then, we compare all scores. But the scores in the results from the title
// we multiply that with this number.
// The effect is that we favor matches in the title more than we favor
// matches that were not in the title.
// If you search for 'foobar' and it appears in the title of one
// not-so-popular record, but also appears in the content of a
// very popular record, you want to give the title-matching one a
// leg up.
// Note that the Lunr scores from the content is usually much higher
// than scores on the title. E.g. the word `codespaces` might appear
// 10 times on a page that is actually about something else. If there's
// a record whose title includes `codespaces` it might get a very low
// Lunr score but since title matches are generally a "better", we
// want to make sure this number accounts for that.
const TITLE_FIRST = queryLength <= 2 ? 45 : queryLength <= 6 ? 25 : 10
// Multiplication bonus given to matches that were made on the
// the search where ALL tokens are required.
// E.g. you search for 'foo bar' and we have three records:
//
// A) "This foo is very special"
// B) "With bar and foo you can't go wrong"
// C) "Only bar can save you"
//
// What will happen is that it only finds record (B) when it's
// requires to match both 'foo' *and* 'bar'. So you get these scores:
//
// A) score = result.score + popularity
// B) score = MATCH_PHRASE * (result.score + popularity)
// C) score = result.score + popularity
//
// So it's very powerful multiplier. But that's fine because a
// "phrase match" is a very accurate thing.
const MATCH_PHRASE = 5
// Imagine that we have 1,000 documents. 100 of them contain the word
// 'foobar'. Of those 100, we want to display the top 10 "best".
// But if we only do `lunrindex.search('foobar').slice(0, 10)` we
// would slice prematurely. Instead, we do
// `lunrindex.search('foobar').slice(0, 100)` first, sort those,
// and in the final step, after any custom sorting, we `.slice(0, 10)`.
// This number decides how many to extract from Lunr in the first place
// that we're going to do our custom sorting on.
// This number can be allowed to be pretty big because we're only ever
// going to do the more time-consuming highlighting on the `limit`
// records that we finally return.
const PRE_LIMIT = 500
const titleQuery = query.trim()
let highestTitleScore = 0.0
const andTitleResults = []
// This will turn something like 'foo and bar' into:
// [
// { str: 'foo', metadata: { position: [Array], index: 0 } },
// { str: 'bar', metadata: { position: [Array], index: 1 } }
// ]
// Note how the stopword gets omitted.
// It's important to omit the stopwords because even if the record
// actually contains the stopword, it won't match then.
// E.g. you have a record called "Foo And Bar" and you search for
// {foo AND and AND bar} it will actually not find anything.
// But if you change it to {foo AND bar} it will match "Foo And Bar"
// Same goes if any other stopwords were used like "Foo the Bar with for a".
// That also needs to become an AND-search of {foo AND bar} ...only.
const titleQueryTokenized = lunr.tokenizer(titleQuery).filter(lunr.stopWordFilter)
if (titleQueryTokenized.length > 1) {
andTitleResults.push(
...index
.query((q) => {
for (const { str } of titleQueryTokenized) {
q.term(str, { fields: ['title'], presence: lunr.Query.presence.REQUIRED })
}
})
.slice(0, PRE_LIMIT)
.map((result) => {
const { popularity } = records[result.ref]
if (result.score > highestTitleScore) {
highestTitleScore = result.score
}
const score = result.score / highestTitleScore
return {
result,
_score: MATCH_PHRASE * TITLE_FIRST * (score + POPULARITY_FACTOR * (popularity || 0.0)),
}
})
)
}
const titleResults = index
.query((q) => {
// The objective is to create an OR-query specifically for the 'title'
// because *we* value matches on that much higher than any other
// field in our records.
// But we want to make sure that the last word is always treated
// like a forward-tokenized token. I.e. you typed "google ku"
// becomes a search for "google ku*".
// Note that it's import that use the `lunr.tokenizer()` function when
// using the `index.query()` function because, for starters, it will
// normalize the input.
// If you use `index.search()` is the higher abstraction of basically
// doing this:
// (pseudo code)
//
// Index.prototype.search = function(input) {
// lunr.tokenize(input).forEach(token => {
// Index.query(callback => {
// callback(token)
// })
// })
// }
//
// If we didn't use the tokenized form, we'd get different results
// for searching for "SSH agent" and "ssh AgenT" for example.
titleQueryTokenized.forEach(({ str }, i) => {
const isLastToken = i === titleQueryTokenized.length - 1
const isShort = str.length <= 3
q.term(str, {
fields: ['title'],
wildcard:
isLastToken && isShort ? lunr.Query.wildcard.TRAILING : lunr.Query.wildcard.NONE,
})
})
})
.slice(0, PRE_LIMIT)
.map((result) => {
const { popularity } = records[result.ref]
if (result.score > highestTitleScore) {
highestTitleScore = result.score
}
const score = result.score / highestTitleScore
return {
result,
_score: TITLE_FIRST * (score + POPULARITY_FACTOR * (popularity || 0.0)),
}
})
let allQuery = query.trim()
// Unfortunately, Lunr currently doesn't support phrase matching
// so you always end up with 0 results if you search for `"foo bar"`.
// In this case it's better to do a search for `foo` and `bar`.
if (
allQuery.startsWith('"') &&
allQuery.endsWith('"') &&
(allQuery.match(/"/g) || []).length === 2
) {
allQuery = allQuery.slice(1, -1)
}
let highestAllScore = 0.0
const allResults = index
.search(allQuery)
.slice(0, PRE_LIMIT)
.map((result) => {
const { popularity } = records[result.ref]
if (result.score > highestAllScore) {
highestAllScore = result.score
}
const score = result.score / highestAllScore
return {
result,
score,
_score: score + POPULARITY_FACTOR * (popularity || 0.0),
}
})
const _unique = new Set()
const combinedMatchData = {}
const results = []
for (const matches of [andTitleResults, titleResults, allResults]) {
for (const match of matches) {
const { result } = match
// We need to loop over all results (both from title searches and
// from all-field searches) but we can only keep one.
// But before we do that filtering (i.e. omitting previous kept)
// we need to merge all the matchData from each result.
// That's because the `result.matchData` from the title search
// will have Lunr match positions for 'title' but the `result.matchData`
// from the all-field search, will have positions for other things
// such as 'content' and 'breadcrumbs'.
combinedMatchData[result.ref] = Object.assign(
combinedMatchData[result.ref] || {},
result.matchData
)
if (_unique.has(result.ref)) continue
_unique.add(result.ref)
results.push(match)
}
}
// Highest score first
results.sort((a, b) => b._score - a._score)
// We might have found much more than `limit` number of matches and we've
// taken them all out for our custom sorting. Now, once that's done,
// of the ones we're going to return we apply the highlighting.
// The reasonsing is that the highlighting work isn't free and it'd
// be a waste to do it on results we're not going to return anyway.
return results.slice(0, limit).map(({ result }) => {
const record = records[result.ref]
const matchData = combinedMatchData[result.ref]
return {
url: result.ref,
breadcrumbs: field(matchData, record, 'breadcrumbs'),
title: field(matchData, record, 'title'),
content: smartSlice(field(matchData, record, 'content'), MAX_CONTENT_LENGTH),
// don't highlight the topics array
topics: record.topics,
score: result.score,
popularity: record.popularity || 0.0,
}
})
}
async function loadLunrIndex(indexName) {
const filePath = path.posix.join(__dirname, LUNR_DIR, `${indexName}.json.br`)
// Do not set to 'utf8' on file reads
return fs.readFile(filePath).then(decompress).then(JSON.parse).then(lunr.Index.load)
}
async function loadLunrRecords(indexName) {
const filePath = path.posix.join(__dirname, LUNR_DIR, `${indexName}-records.json.br`)
// Do not set to 'utf8' on file reads
return fs.readFile(filePath).then(decompress).then(JSON.parse)
}
// Highlight a match within an attribute field
function field(matchData, record, name) {
const text = record[name]
if (!text) return text
// First, get a list of all the positions of the matching tokens
const positions = Object.values(matchData.metadata)
.map((fields) => get(fields, [name, 'position']))
.filter(Boolean)
.flat()
.sort((a, b) => a[0] - b[0])
.map(([start, length]) => [start, start + length])
.map(([start, end], i, a) => [i && a[i - 1][1], start, end])
// If this field has no token matches, no highlighting
if (!positions.length) return text
// Highlight the text
const highlighted = positions
.map(([prev, start, end], i) => [
text.slice(prev, start),
mark(text.slice(start, end)),
i === positions.length - 1 && text.slice(end),
])
.flat()
.filter(Boolean)
.join('')
// We can't HTML escape the content until AFTER all the matchData positions
// have been processed otherwise, the positions should shift.
// The only HTML that is OK to keep is <mark> and </mark>.
return highlighted
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/&lt;mark&gt;/g, '<mark>')
.replace(/&lt;\/mark&gt;/g, '</mark>')
}
function mark(text) {
return `<mark>${text}</mark>`
}
// Give a long string, "slice" it in a safe way so as to not chop any
// HTML tags in half.
// The resulting string will only be at *least* as long as the `length`
// provided. Possibly longer.
function smartSlice(text, length, needleTag = '<mark>') {
// If the needleTag isn't present at all, we can dare to use a
// very basic crude string slice because the text won't have any
// other HTML tags we might cut in half.
if (!text.includes(needleTag)) {
return text.slice(0, length)
}
// The algorithm is simple, split the text by lines. Loop over them,
// and only include them if we've encountered the first needleTag
// and bail early if we've buffered enough in the array of lines.
const lines = []
let sum = 0
let started = false
for (const line of text.split('\n')) {
if (line.indexOf(needleTag) > -1) started = true
if (started) {
lines.push(line)
sum += line.length
if (sum > length) {
break
}
}
}
return lines.join('\n')
}

313
package-lock.json generated
View File

@@ -51,8 +51,6 @@
"lodash": "^4.17.21", "lodash": "^4.17.21",
"lodash-es": "^4.17.21", "lodash-es": "^4.17.21",
"lowdb": "5.0.5", "lowdb": "5.0.5",
"lunr": "^2.3.9",
"lunr-languages": "^1.9.0",
"mdast-util-from-markdown": "^1.2.0", "mdast-util-from-markdown": "^1.2.0",
"mdast-util-to-string": "^3.1.0", "mdast-util-to-string": "^3.1.0",
"morgan": "^1.10.0", "morgan": "^1.10.0",
@@ -5651,9 +5649,9 @@
} }
}, },
"node_modules/babel-loader": { "node_modules/babel-loader": {
"version": "9.1.0", "version": "9.0.1",
"resolved": "https://registry.npmjs.org/babel-loader/-/babel-loader-9.1.0.tgz", "resolved": "https://registry.npmjs.org/babel-loader/-/babel-loader-9.0.1.tgz",
"integrity": "sha512-Antt61KJPinUMwHwIIz9T5zfMgevnfZkEVWYDWlG888fgdvRRGD0JTuf/fFozQnfT+uq64sk1bmdHDy/mOEWnA==", "integrity": "sha512-szYjslOXFlj/po5KfrVmiuBAcI6GVHFuAgC96Qd6mMPHdwl4lmAJkYtvjQ1RxxPjgdkKjd3LQgXDE4jxEutNuw==",
"dev": true, "dev": true,
"dependencies": { "dependencies": {
"find-cache-dir": "^3.3.2", "find-cache-dir": "^3.3.2",
@@ -7477,7 +7475,7 @@
"node_modules/css-url-parser": { "node_modules/css-url-parser": {
"version": "1.1.3", "version": "1.1.3",
"resolved": "https://registry.npmjs.org/css-url-parser/-/css-url-parser-1.1.3.tgz", "resolved": "https://registry.npmjs.org/css-url-parser/-/css-url-parser-1.1.3.tgz",
"integrity": "sha512-KO4HrqK3lAlrnobbBEHib/lFRw7kGOlQTLYhwTwWzDEGilGTYIYOpI22d+6euyZiqfZpV96pii87ZufifbxpqA==", "integrity": "sha1-qkAeXT3RwLkwTAlgKLuZIAH/XJc=",
"optional": true "optional": true
}, },
"node_modules/css-what": { "node_modules/css-what": {
@@ -10642,9 +10640,9 @@
"license": "ISC" "license": "ISC"
}, },
"node_modules/image-size": { "node_modules/image-size": {
"version": "1.0.1", "version": "1.0.2",
"resolved": "https://registry.npmjs.org/image-size/-/image-size-1.0.1.tgz", "resolved": "https://registry.npmjs.org/image-size/-/image-size-1.0.2.tgz",
"integrity": "sha512-VAwkvNSNGClRw9mDHhc5Efax8PLlsOGcUTh0T/LIriC8vPA3U5PdqXWqkz406MoYHMKW8Uf9gWr05T/rYB44kQ==", "integrity": "sha512-xfOoWjceHntRb3qFCrh5ZFORYH8XCdYpASltMhZ/Q0KZiOwjdE/Yl2QCiWdwD+lygV5bMCvauzgu5PxBX/Yerg==",
"optional": true, "optional": true,
"dependencies": { "dependencies": {
"queue": "6.0.2" "queue": "6.0.2"
@@ -10653,7 +10651,7 @@
"image-size": "bin/image-size.js" "image-size": "bin/image-size.js"
}, },
"engines": { "engines": {
"node": ">=12.0.0" "node": ">=14.0.0"
} }
}, },
"node_modules/immutable": { "node_modules/immutable": {
@@ -14290,14 +14288,6 @@
"node": ">=10" "node": ">=10"
} }
}, },
"node_modules/lunr": {
"version": "2.3.9",
"license": "MIT"
},
"node_modules/lunr-languages": {
"version": "1.9.0",
"license": "MPL-1.1"
},
"node_modules/make-dir": { "node_modules/make-dir": {
"version": "3.1.0", "version": "3.1.0",
"dev": true, "dev": true,
@@ -15983,9 +15973,9 @@
} }
}, },
"node_modules/p-queue": { "node_modules/p-queue": {
"version": "7.3.0", "version": "7.2.0",
"resolved": "https://registry.npmjs.org/p-queue/-/p-queue-7.3.0.tgz", "resolved": "https://registry.npmjs.org/p-queue/-/p-queue-7.2.0.tgz",
"integrity": "sha512-5fP+yVQ0qp0rEfZoDTlP2c3RYBgxvRsw30qO+VtPPc95lyvSG+x6USSh1TuLB4n96IO6I8/oXQGsTgtna4q2nQ==", "integrity": "sha512-Kvv7p13M46lTYLQ/PsZdaj/1Vj6u/8oiIJgyQyx4oVkOfHdd7M2EZvXigDvcsSzRwanCzQirV5bJPQFoSQt5MA==",
"optional": true, "optional": true,
"dependencies": { "dependencies": {
"eventemitter3": "^4.0.7", "eventemitter3": "^4.0.7",
@@ -19387,7 +19377,7 @@
"node_modules/truncate-utf8-bytes": { "node_modules/truncate-utf8-bytes": {
"version": "1.0.2", "version": "1.0.2",
"resolved": "https://registry.npmjs.org/truncate-utf8-bytes/-/truncate-utf8-bytes-1.0.2.tgz", "resolved": "https://registry.npmjs.org/truncate-utf8-bytes/-/truncate-utf8-bytes-1.0.2.tgz",
"integrity": "sha512-95Pu1QXQvruGEhv62XCMO3Mm90GscOCClvrIUwCM0PYOXK3kaF3l3sIHxx71ThJfcbM2O5Au6SO3AWCSEfW4mQ==", "integrity": "sha1-QFkjkJWS1W94pYGENLC3hInKXys=",
"optional": true, "optional": true,
"dependencies": { "dependencies": {
"utf8-byte-length": "^1.0.1" "utf8-byte-length": "^1.0.1"
@@ -19912,7 +19902,7 @@
"node_modules/utf8-byte-length": { "node_modules/utf8-byte-length": {
"version": "1.0.4", "version": "1.0.4",
"resolved": "https://registry.npmjs.org/utf8-byte-length/-/utf8-byte-length-1.0.4.tgz", "resolved": "https://registry.npmjs.org/utf8-byte-length/-/utf8-byte-length-1.0.4.tgz",
"integrity": "sha512-4+wkEYLBbWxqTahEsWrhxepcoVOJ+1z5PGIjPZxRkytcdSUaNjIjBM7Xn8E+pdSuV7SzvWovBFA54FO0JSoqhA==", "integrity": "sha1-9F8VDExm7uloGGUFq5P8u4rWv2E=",
"optional": true "optional": true
}, },
"node_modules/util-deprecate": { "node_modules/util-deprecate": {
@@ -20321,17 +20311,16 @@
} }
}, },
"node_modules/website-scraper": { "node_modules/website-scraper": {
"version": "5.0.0", "version": "5.3.1",
"resolved": "https://registry.npmjs.org/website-scraper/-/website-scraper-5.0.0.tgz", "resolved": "https://registry.npmjs.org/website-scraper/-/website-scraper-5.3.1.tgz",
"integrity": "sha512-wZP7fSQR86UZSCXfKzd5OlgBb6AdxXN6gVN07Hy2wYxp2+GeqQAIw+sbqXNlPQnpJLwmRZDWp2u6KeuaFOhotw==", "integrity": "sha512-gogqPXD2gVsxoyd2yRiympw3rA5GuEpD1CaDEJ/J8zzanx7hkbTtneoO1SGs436PpLbWVcUge+6APGLhzsuZPA==",
"optional": true, "optional": true,
"dependencies": { "dependencies": {
"cheerio": "1.0.0-rc.10", "cheerio": "1.0.0-rc.12",
"css-url-parser": "^1.0.0", "css-url-parser": "^1.0.0",
"debug": "^4.3.1", "debug": "^4.3.1",
"fs-extra": "^10.0.0", "fs-extra": "^10.0.0",
"got": "^12.0.0", "got": "^12.0.0",
"lodash": "^4.17.21",
"normalize-url": "^7.0.2", "normalize-url": "^7.0.2",
"p-queue": "^7.1.0", "p-queue": "^7.1.0",
"sanitize-filename": "^1.6.3", "sanitize-filename": "^1.6.3",
@@ -20341,121 +20330,6 @@
"node": ">=14.14" "node": ">=14.14"
} }
}, },
"node_modules/website-scraper/node_modules/cheerio": {
"version": "1.0.0-rc.10",
"resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.0.0-rc.10.tgz",
"integrity": "sha512-g0J0q/O6mW8z5zxQ3A8E8J1hUgp4SMOvEoW/x84OwyHKe/Zccz83PVT4y5Crcr530FV6NgmKI1qvGTKVl9XXVw==",
"optional": true,
"dependencies": {
"cheerio-select": "^1.5.0",
"dom-serializer": "^1.3.2",
"domhandler": "^4.2.0",
"htmlparser2": "^6.1.0",
"parse5": "^6.0.1",
"parse5-htmlparser2-tree-adapter": "^6.0.1",
"tslib": "^2.2.0"
},
"engines": {
"node": ">= 6"
},
"funding": {
"url": "https://github.com/cheeriojs/cheerio?sponsor=1"
}
},
"node_modules/website-scraper/node_modules/cheerio-select": {
"version": "1.6.0",
"resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-1.6.0.tgz",
"integrity": "sha512-eq0GdBvxVFbqWgmCm7M3XGs1I8oLy/nExUnh6oLqmBditPO9AqQJrkslDpMun/hZ0yyTs8L0m85OHp4ho6Qm9g==",
"optional": true,
"dependencies": {
"css-select": "^4.3.0",
"css-what": "^6.0.1",
"domelementtype": "^2.2.0",
"domhandler": "^4.3.1",
"domutils": "^2.8.0"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/website-scraper/node_modules/css-select": {
"version": "4.3.0",
"resolved": "https://registry.npmjs.org/css-select/-/css-select-4.3.0.tgz",
"integrity": "sha512-wPpOYtnsVontu2mODhA19JrqWxNsfdatRKd64kmpRbQgh1KtItko5sTnEpPdpSaJszTOhEMlF/RPz28qj4HqhQ==",
"optional": true,
"dependencies": {
"boolbase": "^1.0.0",
"css-what": "^6.0.1",
"domhandler": "^4.3.1",
"domutils": "^2.8.0",
"nth-check": "^2.0.1"
},
"funding": {
"url": "https://github.com/sponsors/fb55"
}
},
"node_modules/website-scraper/node_modules/dom-serializer": {
"version": "1.4.1",
"resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-1.4.1.tgz",
"integrity": "sha512-VHwB3KfrcOOkelEG2ZOfxqLZdfkil8PtJi4P8N2MMXucZq2yLp75ClViUlOVwyoHEDjYU433Aq+5zWP61+RGag==",
"optional": true,
"dependencies": {
"domelementtype": "^2.0.1",
"domhandler": "^4.2.0",
"entities": "^2.0.0"
},
"funding": {
"url": "https://github.com/cheeriojs/dom-serializer?sponsor=1"
}
},
"node_modules/website-scraper/node_modules/domhandler": {
"version": "4.3.1",
"resolved": "https://registry.npmjs.org/domhandler/-/domhandler-4.3.1.tgz",
"integrity": "sha512-GrwoxYN+uWlzO8uhUXRl0P+kHE4GtVPfYzVLcUxPL7KNdHKj66vvlhiweIHqYYXWlw+T8iLMp42Lm67ghw4WMQ==",
"optional": true,
"dependencies": {
"domelementtype": "^2.2.0"
},
"engines": {
"node": ">= 4"
},
"funding": {
"url": "https://github.com/fb55/domhandler?sponsor=1"
}
},
"node_modules/website-scraper/node_modules/domutils": {
"version": "2.8.0",
"resolved": "https://registry.npmjs.org/domutils/-/domutils-2.8.0.tgz",
"integrity": "sha512-w96Cjofp72M5IIhpjgobBimYEfoPjx1Vx0BSX9P30WBdZW2WIKU0T1Bd0kz2eNZ9ikjKgHbEyKx8BB6H1L3h3A==",
"optional": true,
"dependencies": {
"dom-serializer": "^1.0.1",
"domelementtype": "^2.2.0",
"domhandler": "^4.2.0"
},
"funding": {
"url": "https://github.com/fb55/domutils?sponsor=1"
}
},
"node_modules/website-scraper/node_modules/htmlparser2": {
"version": "6.1.0",
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-6.1.0.tgz",
"integrity": "sha512-gyyPk6rgonLFEDGoeRgQNaEUvdJ4ktTmmUh/h2t7s+M8oPpIPxgNACWa+6ESR57kXstwqPiCut0V8NRpcwgU7A==",
"funding": [
"https://github.com/fb55/htmlparser2?sponsor=1",
{
"type": "github",
"url": "https://github.com/sponsors/fb55"
}
],
"optional": true,
"dependencies": {
"domelementtype": "^2.0.1",
"domhandler": "^4.0.0",
"domutils": "^2.5.2",
"entities": "^2.0.0"
}
},
"node_modules/website-scraper/node_modules/normalize-url": { "node_modules/website-scraper/node_modules/normalize-url": {
"version": "7.2.0", "version": "7.2.0",
"resolved": "https://registry.npmjs.org/normalize-url/-/normalize-url-7.2.0.tgz", "resolved": "https://registry.npmjs.org/normalize-url/-/normalize-url-7.2.0.tgz",
@@ -20468,21 +20342,6 @@
"url": "https://github.com/sponsors/sindresorhus" "url": "https://github.com/sponsors/sindresorhus"
} }
}, },
"node_modules/website-scraper/node_modules/parse5": {
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/parse5/-/parse5-6.0.1.tgz",
"integrity": "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==",
"optional": true
},
"node_modules/website-scraper/node_modules/parse5-htmlparser2-tree-adapter": {
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-6.0.1.tgz",
"integrity": "sha512-qPuWvbLgvDGilKc5BoicRovlT4MtYT6JfJyBOMDsKoiT+GiuP5qyrPCnR9HcPECIJJmZh5jRndyNThnhhb/vlA==",
"optional": true,
"dependencies": {
"parse5": "^6.0.1"
}
},
"node_modules/which": { "node_modules/which": {
"version": "2.0.2", "version": "2.0.2",
"dev": true, "dev": true,
@@ -24780,9 +24639,9 @@
} }
}, },
"babel-loader": { "babel-loader": {
"version": "9.1.0", "version": "9.0.1",
"resolved": "https://registry.npmjs.org/babel-loader/-/babel-loader-9.1.0.tgz", "resolved": "https://registry.npmjs.org/babel-loader/-/babel-loader-9.0.1.tgz",
"integrity": "sha512-Antt61KJPinUMwHwIIz9T5zfMgevnfZkEVWYDWlG888fgdvRRGD0JTuf/fFozQnfT+uq64sk1bmdHDy/mOEWnA==", "integrity": "sha512-szYjslOXFlj/po5KfrVmiuBAcI6GVHFuAgC96Qd6mMPHdwl4lmAJkYtvjQ1RxxPjgdkKjd3LQgXDE4jxEutNuw==",
"dev": true, "dev": true,
"requires": { "requires": {
"find-cache-dir": "^3.3.2", "find-cache-dir": "^3.3.2",
@@ -26103,7 +25962,7 @@
"css-url-parser": { "css-url-parser": {
"version": "1.1.3", "version": "1.1.3",
"resolved": "https://registry.npmjs.org/css-url-parser/-/css-url-parser-1.1.3.tgz", "resolved": "https://registry.npmjs.org/css-url-parser/-/css-url-parser-1.1.3.tgz",
"integrity": "sha512-KO4HrqK3lAlrnobbBEHib/lFRw7kGOlQTLYhwTwWzDEGilGTYIYOpI22d+6euyZiqfZpV96pii87ZufifbxpqA==", "integrity": "sha1-qkAeXT3RwLkwTAlgKLuZIAH/XJc=",
"optional": true "optional": true
}, },
"css-what": { "css-what": {
@@ -28238,9 +28097,9 @@
"dev": true "dev": true
}, },
"image-size": { "image-size": {
"version": "1.0.1", "version": "1.0.2",
"resolved": "https://registry.npmjs.org/image-size/-/image-size-1.0.1.tgz", "resolved": "https://registry.npmjs.org/image-size/-/image-size-1.0.2.tgz",
"integrity": "sha512-VAwkvNSNGClRw9mDHhc5Efax8PLlsOGcUTh0T/LIriC8vPA3U5PdqXWqkz406MoYHMKW8Uf9gWr05T/rYB44kQ==", "integrity": "sha512-xfOoWjceHntRb3qFCrh5ZFORYH8XCdYpASltMhZ/Q0KZiOwjdE/Yl2QCiWdwD+lygV5bMCvauzgu5PxBX/Yerg==",
"optional": true, "optional": true,
"requires": { "requires": {
"queue": "6.0.2" "queue": "6.0.2"
@@ -30902,12 +30761,6 @@
"yallist": "^4.0.0" "yallist": "^4.0.0"
} }
}, },
"lunr": {
"version": "2.3.9"
},
"lunr-languages": {
"version": "1.9.0"
},
"make-dir": { "make-dir": {
"version": "3.1.0", "version": "3.1.0",
"dev": true, "dev": true,
@@ -31925,9 +31778,9 @@
} }
}, },
"p-queue": { "p-queue": {
"version": "7.3.0", "version": "7.2.0",
"resolved": "https://registry.npmjs.org/p-queue/-/p-queue-7.3.0.tgz", "resolved": "https://registry.npmjs.org/p-queue/-/p-queue-7.2.0.tgz",
"integrity": "sha512-5fP+yVQ0qp0rEfZoDTlP2c3RYBgxvRsw30qO+VtPPc95lyvSG+x6USSh1TuLB4n96IO6I8/oXQGsTgtna4q2nQ==", "integrity": "sha512-Kvv7p13M46lTYLQ/PsZdaj/1Vj6u/8oiIJgyQyx4oVkOfHdd7M2EZvXigDvcsSzRwanCzQirV5bJPQFoSQt5MA==",
"optional": true, "optional": true,
"requires": { "requires": {
"eventemitter3": "^4.0.7", "eventemitter3": "^4.0.7",
@@ -34200,7 +34053,7 @@
"truncate-utf8-bytes": { "truncate-utf8-bytes": {
"version": "1.0.2", "version": "1.0.2",
"resolved": "https://registry.npmjs.org/truncate-utf8-bytes/-/truncate-utf8-bytes-1.0.2.tgz", "resolved": "https://registry.npmjs.org/truncate-utf8-bytes/-/truncate-utf8-bytes-1.0.2.tgz",
"integrity": "sha512-95Pu1QXQvruGEhv62XCMO3Mm90GscOCClvrIUwCM0PYOXK3kaF3l3sIHxx71ThJfcbM2O5Au6SO3AWCSEfW4mQ==", "integrity": "sha1-QFkjkJWS1W94pYGENLC3hInKXys=",
"optional": true, "optional": true,
"requires": { "requires": {
"utf8-byte-length": "^1.0.1" "utf8-byte-length": "^1.0.1"
@@ -34549,7 +34402,7 @@
"utf8-byte-length": { "utf8-byte-length": {
"version": "1.0.4", "version": "1.0.4",
"resolved": "https://registry.npmjs.org/utf8-byte-length/-/utf8-byte-length-1.0.4.tgz", "resolved": "https://registry.npmjs.org/utf8-byte-length/-/utf8-byte-length-1.0.4.tgz",
"integrity": "sha512-4+wkEYLBbWxqTahEsWrhxepcoVOJ+1z5PGIjPZxRkytcdSUaNjIjBM7Xn8E+pdSuV7SzvWovBFA54FO0JSoqhA==", "integrity": "sha1-9F8VDExm7uloGGUFq5P8u4rWv2E=",
"optional": true "optional": true
}, },
"util-deprecate": { "util-deprecate": {
@@ -34835,127 +34688,27 @@
"peer": true "peer": true
}, },
"website-scraper": { "website-scraper": {
"version": "5.0.0", "version": "5.3.1",
"resolved": "https://registry.npmjs.org/website-scraper/-/website-scraper-5.0.0.tgz", "resolved": "https://registry.npmjs.org/website-scraper/-/website-scraper-5.3.1.tgz",
"integrity": "sha512-wZP7fSQR86UZSCXfKzd5OlgBb6AdxXN6gVN07Hy2wYxp2+GeqQAIw+sbqXNlPQnpJLwmRZDWp2u6KeuaFOhotw==", "integrity": "sha512-gogqPXD2gVsxoyd2yRiympw3rA5GuEpD1CaDEJ/J8zzanx7hkbTtneoO1SGs436PpLbWVcUge+6APGLhzsuZPA==",
"optional": true, "optional": true,
"requires": { "requires": {
"cheerio": "1.0.0-rc.10", "cheerio": "1.0.0-rc.12",
"css-url-parser": "^1.0.0", "css-url-parser": "^1.0.0",
"debug": "^4.3.1", "debug": "^4.3.1",
"fs-extra": "^10.0.0", "fs-extra": "^10.0.0",
"got": "^12.0.0", "got": "^12.0.0",
"lodash": "^4.17.21",
"normalize-url": "^7.0.2", "normalize-url": "^7.0.2",
"p-queue": "^7.1.0", "p-queue": "^7.1.0",
"sanitize-filename": "^1.6.3", "sanitize-filename": "^1.6.3",
"srcset": "^5.0.0" "srcset": "^5.0.0"
}, },
"dependencies": { "dependencies": {
"cheerio": {
"version": "1.0.0-rc.10",
"resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.0.0-rc.10.tgz",
"integrity": "sha512-g0J0q/O6mW8z5zxQ3A8E8J1hUgp4SMOvEoW/x84OwyHKe/Zccz83PVT4y5Crcr530FV6NgmKI1qvGTKVl9XXVw==",
"optional": true,
"requires": {
"cheerio-select": "^1.5.0",
"dom-serializer": "^1.3.2",
"domhandler": "^4.2.0",
"htmlparser2": "^6.1.0",
"parse5": "^6.0.1",
"parse5-htmlparser2-tree-adapter": "^6.0.1",
"tslib": "^2.2.0"
}
},
"cheerio-select": {
"version": "1.6.0",
"resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-1.6.0.tgz",
"integrity": "sha512-eq0GdBvxVFbqWgmCm7M3XGs1I8oLy/nExUnh6oLqmBditPO9AqQJrkslDpMun/hZ0yyTs8L0m85OHp4ho6Qm9g==",
"optional": true,
"requires": {
"css-select": "^4.3.0",
"css-what": "^6.0.1",
"domelementtype": "^2.2.0",
"domhandler": "^4.3.1",
"domutils": "^2.8.0"
}
},
"css-select": {
"version": "4.3.0",
"resolved": "https://registry.npmjs.org/css-select/-/css-select-4.3.0.tgz",
"integrity": "sha512-wPpOYtnsVontu2mODhA19JrqWxNsfdatRKd64kmpRbQgh1KtItko5sTnEpPdpSaJszTOhEMlF/RPz28qj4HqhQ==",
"optional": true,
"requires": {
"boolbase": "^1.0.0",
"css-what": "^6.0.1",
"domhandler": "^4.3.1",
"domutils": "^2.8.0",
"nth-check": "^2.0.1"
}
},
"dom-serializer": {
"version": "1.4.1",
"resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-1.4.1.tgz",
"integrity": "sha512-VHwB3KfrcOOkelEG2ZOfxqLZdfkil8PtJi4P8N2MMXucZq2yLp75ClViUlOVwyoHEDjYU433Aq+5zWP61+RGag==",
"optional": true,
"requires": {
"domelementtype": "^2.0.1",
"domhandler": "^4.2.0",
"entities": "^2.0.0"
}
},
"domhandler": {
"version": "4.3.1",
"resolved": "https://registry.npmjs.org/domhandler/-/domhandler-4.3.1.tgz",
"integrity": "sha512-GrwoxYN+uWlzO8uhUXRl0P+kHE4GtVPfYzVLcUxPL7KNdHKj66vvlhiweIHqYYXWlw+T8iLMp42Lm67ghw4WMQ==",
"optional": true,
"requires": {
"domelementtype": "^2.2.0"
}
},
"domutils": {
"version": "2.8.0",
"resolved": "https://registry.npmjs.org/domutils/-/domutils-2.8.0.tgz",
"integrity": "sha512-w96Cjofp72M5IIhpjgobBimYEfoPjx1Vx0BSX9P30WBdZW2WIKU0T1Bd0kz2eNZ9ikjKgHbEyKx8BB6H1L3h3A==",
"optional": true,
"requires": {
"dom-serializer": "^1.0.1",
"domelementtype": "^2.2.0",
"domhandler": "^4.2.0"
}
},
"htmlparser2": {
"version": "6.1.0",
"resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-6.1.0.tgz",
"integrity": "sha512-gyyPk6rgonLFEDGoeRgQNaEUvdJ4ktTmmUh/h2t7s+M8oPpIPxgNACWa+6ESR57kXstwqPiCut0V8NRpcwgU7A==",
"optional": true,
"requires": {
"domelementtype": "^2.0.1",
"domhandler": "^4.0.0",
"domutils": "^2.5.2",
"entities": "^2.0.0"
}
},
"normalize-url": { "normalize-url": {
"version": "7.2.0", "version": "7.2.0",
"resolved": "https://registry.npmjs.org/normalize-url/-/normalize-url-7.2.0.tgz", "resolved": "https://registry.npmjs.org/normalize-url/-/normalize-url-7.2.0.tgz",
"integrity": "sha512-uhXOdZry0L6M2UIo9BTt7FdpBDiAGN/7oItedQwPKh8jh31ZlvC8U9Xl/EJ3aijDHaywXTW3QbZ6LuCocur1YA==", "integrity": "sha512-uhXOdZry0L6M2UIo9BTt7FdpBDiAGN/7oItedQwPKh8jh31ZlvC8U9Xl/EJ3aijDHaywXTW3QbZ6LuCocur1YA==",
"optional": true "optional": true
},
"parse5": {
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/parse5/-/parse5-6.0.1.tgz",
"integrity": "sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw==",
"optional": true
},
"parse5-htmlparser2-tree-adapter": {
"version": "6.0.1",
"resolved": "https://registry.npmjs.org/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-6.0.1.tgz",
"integrity": "sha512-qPuWvbLgvDGilKc5BoicRovlT4MtYT6JfJyBOMDsKoiT+GiuP5qyrPCnR9HcPECIJJmZh5jRndyNThnhhb/vlA==",
"optional": true,
"requires": {
"parse5": "^6.0.1"
}
} }
} }
}, },

View File

@@ -53,8 +53,6 @@
"lodash": "^4.17.21", "lodash": "^4.17.21",
"lodash-es": "^4.17.21", "lodash-es": "^4.17.21",
"lowdb": "5.0.5", "lowdb": "5.0.5",
"lunr": "^2.3.9",
"lunr-languages": "^1.9.0",
"mdast-util-from-markdown": "^1.2.0", "mdast-util-from-markdown": "^1.2.0",
"mdast-util-to-string": "^3.1.0", "mdast-util-to-string": "^3.1.0",
"morgan": "^1.10.0", "morgan": "^1.10.0",

View File

@@ -748,20 +748,6 @@ Creates Elasticsearch index, populates from records, moves the index alias, dele
--- ---
### [`search/lunr-get-index-names.js`](search/lunr-get-index-names.js)
---
### [`search/lunr-search-index.js`](search/lunr-search-index.js)
---
### [`search/parse-page-sections-into-records.js`](search/parse-page-sections-into-records.js) ### [`search/parse-page-sections-into-records.js`](search/parse-page-sections-into-records.js)
@@ -779,23 +765,6 @@ Creates Elasticsearch index, populates from records, moves the index alias, dele
### [`search/search-index-records.js`](search/search-index-records.js) ### [`search/search-index-records.js`](search/search-index-records.js)
---
### [`search/search-qa-data.json`](search/search-qa-data.json)
---
### [`search/search-qa-test.js`](search/search-qa-test.js)
This script is a quality assurance test for the Lunr search configuration. This test runs example queries and expects a specific page to land in the top 3 results.
The data source used by this script is a JSON file `script/search/search-qa-data.json`, which is populated from spreadsheet data here: https://docs.google.com/spreadsheets/d/1Dt5JRVcmyAGWKBwGjwmXxi7Ww_vdfYLfZ-EFpu2S2CQ/edit?usp=sharing
--- ---

View File

@@ -18,7 +18,6 @@ import dotenv from 'dotenv'
import { retryOnErrorTest } from '../helpers/retry-on-error-test.js' import { retryOnErrorTest } from '../helpers/retry-on-error-test.js'
import { languageKeys } from '../../lib/languages.js' import { languageKeys } from '../../lib/languages.js'
import { allVersions } from '../../lib/all-versions.js' import { allVersions } from '../../lib/all-versions.js'
import { decompress } from '../../lib/search/compress.js'
import statsd from '../../lib/statsd.js' import statsd from '../../lib/statsd.js'
// Now you can optionally have set the ELASTICSEARCH_URL in your .env file. // Now you can optionally have set the ELASTICSEARCH_URL in your .env file.
@@ -237,8 +236,8 @@ async function indexVersion(
verbose = false verbose = false
) { ) {
// Note, it's a bit "weird" that numbered releases versions are // Note, it's a bit "weird" that numbered releases versions are
// called the number but that's how the lib/search/indexes // called the number but that's the convention the previous
// files were. // search backend used
const indexVersion = shortNames[version].hasNumberedReleases const indexVersion = shortNames[version].hasNumberedReleases
? shortNames[version].currentRelease ? shortNames[version].currentRelease
: shortNames[version].miscBaseName : shortNames[version].miscBaseName
@@ -414,22 +413,10 @@ function escapeHTML(content) {
} }
async function loadRecords(indexName, sourceDirectory) { async function loadRecords(indexName, sourceDirectory) {
// First try looking for the `$indexName-records.json.br` file.
// If that doens't work, look for the `$indexName-records.json` one.
try {
const filePath = path.join(sourceDirectory, `${indexName}-records.json.br`)
// Do not set to 'utf8' on file reads
const payload = await fs.readFile(filePath).then(decompress)
return JSON.parse(payload)
} catch (error) {
if (error.code === 'ENOENT') {
const filePath = path.join(sourceDirectory, `${indexName}-records.json`) const filePath = path.join(sourceDirectory, `${indexName}-records.json`)
const payload = await fs.readFile(filePath) const payload = await fs.readFile(filePath)
return JSON.parse(payload) return JSON.parse(payload)
} }
throw error
}
}
function getSnowballLanguage(language) { function getSnowballLanguage(language) {
// Based on https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-snowball-tokenfilter.html // Based on https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-snowball-tokenfilter.html

View File

@@ -1,12 +0,0 @@
#!/usr/bin/env node
import { fileURLToPath } from 'url'
import path from 'path'
import fs from 'fs/promises'
const __dirname = path.dirname(fileURLToPath(import.meta.url))
async function getIndexNames() {
const indexList = await fs.readdir(path.join(__dirname, '../../lib/search/indexes'))
return indexList.sort().map((index) => index.replace('.json.br', ''))
}
export default await getIndexNames()

View File

@@ -1,114 +0,0 @@
#!/usr/bin/env node
import { fileURLToPath } from 'url'
import path from 'path'
import lunr from 'lunr'
import lunrStemmerSupport from 'lunr-languages/lunr.stemmer.support.js'
import tinyseg from 'lunr-languages/tinyseg.js'
import lunrJa from 'lunr-languages/lunr.ja.js'
import lunrEs from 'lunr-languages/lunr.es.js'
import lunrPt from 'lunr-languages/lunr.pt.js'
import fs from 'fs/promises'
import validateRecords from './validate-records.js'
import { compress } from '../../lib/search/compress.js'
const __dirname = path.dirname(fileURLToPath(import.meta.url))
lunrStemmerSupport(lunr)
tinyseg(lunr)
lunrJa(lunr)
lunrEs(lunr)
lunrPt(lunr)
export default class LunrIndex {
constructor(name, records) {
this.name = name
// Add custom rankings
this.records = records.map((record) => {
return record
})
this.validate()
return this
}
validate() {
return validateRecords(this.name, this.records)
}
build() {
const language = this.name.split('-').pop()
const records = this.records
this.index = lunr(function constructIndex() {
// No arrow here!
if (['ja', 'es', 'pt'].includes(language)) {
this.use(lunr[language])
}
// By default Lunr considers the `-` character to be a word boundary.
// This allows hyphens to be included in the search index.
// If you change this, remember to make it match the indexing separator
// in lib/search/lunr-search.js so the query is tokenized
// identically to the way it was indexed.
this.tokenizer.separator = /[\s]+/
this.ref('objectID')
this.field('url')
this.field('breadcrumbs')
this.field('headings', { boost: 3 })
this.field('title', { boost: 5 })
this.field('content')
this.field('topics')
this.metadataWhitelist = ['position']
for (const record of records) {
this.add(record)
}
})
}
toJSON() {
this.build()
return JSON.stringify(this.index, null, 2)
}
get recordsObject() {
return Object.fromEntries(this.records.map((record) => [record.objectID, record]))
}
async write({
outDirectory = path.posix.join(__dirname, '../../lib/search/indexes'),
compressFiles = true,
}) {
this.build()
// Write the parsed records
await Promise.resolve(this.recordsObject)
.then(JSON.stringify)
.then((str) => (compressFiles ? compress(str) : str))
.then((content) =>
fs.writeFile(
path.join(
outDirectory,
compressFiles ? `${this.name}-records.json.br` : `${this.name}-records.json`
),
content
// Do not set to 'utf8'
)
)
// Write the index
await Promise.resolve(this.index)
.then(JSON.stringify)
.then((str) => (compressFiles ? compress(str) : str))
.then((content) =>
fs.writeFile(
path.join(outDirectory, compressFiles ? `${this.name}.json.br` : `${this.name}.json`),
content
// Do not set to 'utf8'
)
)
}
}

View File

@@ -1,32 +1,17 @@
#!/usr/bin/env node #!/usr/bin/env node
import { fileURLToPath } from 'url'
import path from 'path' import path from 'path'
import fs from 'fs/promises' import fs from 'fs/promises'
import validateRecords from './validate-records.js' import validateRecords from './validate-records.js'
import { compress } from '../../lib/search/compress.js'
const __dirname = path.dirname(fileURLToPath(import.meta.url)) export async function writeIndexRecords(name, records, outDirectory) {
export async function writeIndexRecords(
name,
records,
{
outDirectory = path.posix.join(__dirname, '../../lib/search/indexes'),
compressFiles = true,
prettyPrint = false,
}
) {
validateRecords(name, records) validateRecords(name, records)
const recordsObject = Object.fromEntries(records.map((record) => [record.objectID, record])) const recordsObject = Object.fromEntries(records.map((record) => [record.objectID, record]))
const content = JSON.stringify(recordsObject, undefined, prettyPrint ? 2 : 0) const content = JSON.stringify(recordsObject, undefined, 0)
const filePath = path.join( const filePath = path.join(outDirectory, `${name}-records.json`)
outDirectory, await fs.writeFile(filePath, content)
compressFiles ? `${name}-records.json.br` : `${name}-records.json`
)
await fs.writeFile(filePath, compressFiles ? await compress(content) : content)
return filePath return filePath
} }

View File

@@ -1,206 +0,0 @@
[
{
"query": "interactions",
"href": "/rest/reference/interactions"
},
{
"query": "repositories",
"href": "/rest/reference/repos"
},
{
"query": "workflow_run",
"href": "/developers/webhooks-and-events/webhooks/webhook-events-and-payloads"
},
{
"query": "workflow_dispatch",
"href": "/developers/webhooks-and-events/webhooks/webhook-events-and-payloads"
},
{
"query": "pull_request",
"href": "/developers/webhooks-and-events/webhooks/webhook-events-and-payloads"
},
{
"query": "workflow_run",
"href": "/actions/learn-github-actions/events-that-trigger-workflows"
},
{
"query": "workflow_dispatch",
"href": "/actions/learn-github-actions/events-that-trigger-workflows"
},
{
"query": "register for an account",
"href": "/get-started/signing-up-for-github/signing-up-for-a-new-github-account"
},
{
"query": "registering on GitHub",
"href": "/get-started/signing-up-for-github/signing-up-for-a-new-github-account"
},
{
"query": "signing up for a GitHub account",
"href": "/get-started/signing-up-for-github/signing-up-for-a-new-github-account"
},
{
"query": "new account",
"href": "/get-started/signing-up-for-github/signing-up-for-a-new-github-account"
},
{
"query": "create a GitHub account",
"href": "/get-started/signing-up-for-github/signing-up-for-a-new-github-account"
},
{
"query": "apis",
"href": "/graphql"
},
{
"query": "apis",
"href": "/rest"
},
{
"query": "api",
"href": "/graphql"
},
{
"query": "api",
"href": "/rest"
},
{
"query": "create a new branch",
"href": "/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-and-deleting-branches-within-your-repository"
},
{
"query": "fix merge conflict",
"href": "/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-using-the-command-line"
},
{
"query": "conflicts",
"href": "/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-using-the-command-line"
},
{
"query": "merge conflict",
"href": "/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-using-the-command-line"
},
{
"query": "branch conflicts",
"href": "/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-using-the-command-line"
},
{
"query": "conflicting files",
"href": "/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-using-the-command-line"
},
{
"query": "resolve conflicts",
"href": "/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-using-the-command-line"
},
{
"query": "fix merge conflict",
"href": "/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-on-github"
},
{
"query": "conflicts",
"href": "/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-on-github"
},
{
"query": "merge conflict",
"href": "/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-on-github"
},
{
"query": "branch conflicts",
"href": "/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-on-github"
},
{
"query": "conflicting files",
"href": "/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-on-github"
},
{
"query": "resolve conflicts",
"href": "/pull-requests/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-on-github"
},
{
"query": "actions billable minutes",
"href": "/billing/managing-billing-for-github-actions/about-billing-for-github-actions"
},
{
"query": "actions trigger pull requests",
"href": "/actions/learn-github-actions/events-that-trigger-workflows"
},
{
"query": "about teams",
"href": "/organizations/organizing-members-into-teams/about-teams"
},
{
"query": "about organizations",
"href": "/organizations/collaborating-with-groups-in-organizations/about-organizations"
},
{
"query": "create pages site",
"href": "/pages/getting-started-with-github-pages/creating-a-github-pages-site"
},
{
"query": "create pages site",
"href": "/pages/setting-up-a-github-pages-site-with-jekyll/creating-a-github-pages-site-with-jekyll"
},
{
"query": "make a team",
"href": "/organizations/organizing-members-into-teams/creating-a-team"
},
{
"query": "new team",
"href": "/organizations/organizing-members-into-teams/creating-a-team"
},
{
"query": "team",
"href": "/organizations/organizing-members-into-teams/about-teams"
},
{
"query": "rest create issue",
"href": "/rest/reference/issues"
},
{
"query": "fork",
"href": "/rest/reference/repos"
},
{
"query": "commit email",
"href": "/account-and-profile/setting-up-and-managing-your-github-user-account/managing-email-preferences/setting-your-commit-email-address"
},
{
"query": "graphql organization",
"href": "/graphql/reference/objects"
},
{
"query": "device flow",
"href": "/developers/apps/building-oauth-apps/authorizing-oauth-apps"
},
{
"query": "convert user",
"href": "/account-and-profile/setting-up-and-managing-your-github-user-account/managing-user-account-settings/converting-a-user-into-an-organization"
},
{
"query": "add email",
"href": "/account-and-profile/setting-up-and-managing-your-github-user-account/managing-email-preferences/adding-an-email-address-to-your-github-account"
},
{
"query": "transfer ownership",
"href": "/organizations/managing-organization-settings/transferring-organization-ownership"
},
{
"query": "merge accounts",
"href": "/account-and-profile/setting-up-and-managing-your-github-user-account/managing-user-account-settings/merging-multiple-user-accounts"
},
{
"query": "search syntax",
"href": "/search-github/getting-started-with-searching-on-github/understanding-the-search-syntax"
},
{
"query": "scim okta",
"href": "/organizations/managing-saml-single-sign-on-for-your-organization/configuring-saml-single-sign-on-and-scim-using-okta"
},
{
"query": "keeping your account and data secure",
"href": "/authentication/keeping-your-account-and-data-secure"
},
{
"query": "ssh troubleshoot",
"href": "/authentication/troubleshooting-ssh"
}
]

View File

@@ -1,93 +0,0 @@
#!/usr/bin/env node
// [start-readme]
//
// This script is a quality assurance test for the Lunr search configuration.
// This test runs example queries and expects a specific page to land in the top
// 3 results.
//
// The data source used by this script is a JSON file `script/search/search-qa-data.json`,
// which is populated from spreadsheet data here:
// https://docs.google.com/spreadsheets/d/1Dt5JRVcmyAGWKBwGjwmXxi7Ww_vdfYLfZ-EFpu2S2CQ/edit?usp=sharing
//
// [end-readme]
import loadLunrResults from '../../lib/search/lunr-search.js'
import { readFileSync } from 'fs'
import { join } from 'path'
const queryData = JSON.parse(readFileSync(join(process.cwd(), 'script/search/search-qa-data.json')))
const version = 'dotcom'
const language = 'en'
const limit = 10
const TOP_RANK = 3
main()
async function main() {
const rankResults = []
for (const item in queryData) {
const { query, href } = queryData[item]
try {
const results = await loadLunrResults({
version,
language,
query,
limit,
})
const hrefs = results.map((result) => result.url.replace('/en', ''))
let rank = hrefs.indexOf(href)
// this allows us to sort the results by rank, including total misses
if (rank === -1) {
rank = limit
}
rankResults.push({ query, href, rank })
} catch (err) {
console.error(err)
}
}
logResults(rankResults)
}
async function logResults(results) {
results.sort((a, b) => a.rank - b.rank)
let first = 0
let top = 0
let low = 0
let miss = 0
results.forEach((result) => {
const { query, href, rank } = result
if (rank === limit) {
miss++
console.log(`🔴 query: ${query} - Expected href: ${href}\n`)
return
}
if (rank === 0) {
first++
console.log(`⭐ Query: ${query} - Expected href: ${href}`)
return
}
if (rank < TOP_RANK) {
top++
console.log(`🟢 Query: ${query} - Expected href: ${href}`)
return
}
low++
console.log(`🟡 Query: ${query} - Expected href: ${href}`)
})
const firstPercentage = ((first / queryData.length) * 100).toFixed(1)
const topPercentage = ((top / queryData.length) * 100).toFixed(1)
const lowPercentage = ((low / queryData.length) * 100).toFixed(1)
const missPercentage = ((miss / queryData.length) * 100).toFixed(1)
console.log(`\n⭐ First hit ${firstPercentage}%`)
console.log(`\n🟢 Top ${TOP_RANK} hit ${topPercentage}%`)
console.log(`\n🟡 Top ${limit} hit ${lowPercentage}%`)
console.log(`\n🔴 Miss ${missPercentage}%`)
}

View File

@@ -8,8 +8,6 @@
// [end-readme] // [end-readme]
import assert from 'assert' import assert from 'assert'
import path from 'path'
import { program, Option } from 'commander' import { program, Option } from 'commander'
import { languageKeys } from '../../lib/languages.js' import { languageKeys } from '../../lib/languages.js'
@@ -27,10 +25,8 @@ const shortNames = Object.fromEntries(
const allVersionKeys = [...Object.keys(shortNames), ...Object.keys(allVersions)] const allVersionKeys = [...Object.keys(shortNames), ...Object.keys(allVersions)]
const DEFAULT_OUT_DIRECTORY = path.join('lib', 'search', 'indexes')
program program
.description('Creates search records (and Lunr indexes) by scraping') .description('Creates search records by scraping')
.option('-v, --verbose', 'Verbose outputs') .option('-v, --verbose', 'Verbose outputs')
.addOption(new Option('-V, --version <VERSION>', 'Specific versions').choices(allVersionKeys)) .addOption(new Option('-V, --version <VERSION>', 'Specific versions').choices(allVersionKeys))
.addOption( .addOption(
@@ -39,23 +35,14 @@ program
.addOption( .addOption(
new Option('--not-language <LANGUAGE>', 'Specific language to omit').choices(languageKeys) new Option('--not-language <LANGUAGE>', 'Specific language to omit').choices(languageKeys)
) )
.option('-d, --dry-run', 'Does not write to disk')
.option(
'-o, --out-directory <DIRECTORY>',
`Where to dump the created files (default ${DEFAULT_OUT_DIRECTORY})`
)
.option('--no-compression', `Do not Brotli compress the created .json files (default false)`)
// Once we've fully removed all Lunr indexing code, we can remove this option
// and change where it's used to be that the default is to not generate
// any Lunr indexes.
.option('--no-lunr-index', `Do not generate a Lunr index, just the records file (default false)`)
.option('--no-markers', 'Do not print a marker for each parsed document') .option('--no-markers', 'Do not print a marker for each parsed document')
.option('--filter <MATCH>', 'Filter to only do pages that match this string') .option('--filter <MATCH>', 'Filter to only do pages that match this string')
.argument('<out-directory>', 'where the indexable files should be written')
.parse(process.argv) .parse(process.argv)
main(program.opts()) main(program.opts(), program.args)
async function main(opts) { async function main(opts, args) {
let language let language
if ('language' in opts) { if ('language' in opts) {
language = opts.language language = opts.language
@@ -117,18 +104,7 @@ async function main(opts) {
`version must be undefined or one of ${Object.keys(allVersions)}` `version must be undefined or one of ${Object.keys(allVersions)}`
) )
let dryRun = false const [outDirectory] = args
if ('dryRun' in opts) {
dryRun = opts.dryRun
} else {
dryRun = Boolean(JSON.parse(process.env.DRY_RUN || 'false'))
}
const outDirectory = opts.outDirectory || DEFAULT_OUT_DIRECTORY
const compressFiles = !!opts.compression
const generateLunrIndex = !!opts.lunrIndex
const config = { const config = {
noMarkers: !opts.markers, noMarkers: !opts.markers,
@@ -136,13 +112,10 @@ async function main(opts) {
} }
const options = { const options = {
dryRun,
language, language,
notLanguage, notLanguage,
version: indexVersion, version: indexVersion,
outDirectory, outDirectory,
compressFiles,
generateLunrIndex,
config, config,
} }
await searchSync(options) await searchSync(options)

View File

@@ -6,7 +6,6 @@ import buildRecords from './build-records.js'
import findIndexablePages from './find-indexable-pages.js' import findIndexablePages from './find-indexable-pages.js'
import { allVersions } from '../../lib/all-versions.js' import { allVersions } from '../../lib/all-versions.js'
import { namePrefix } from '../../lib/search/config.js' import { namePrefix } from '../../lib/search/config.js'
import LunrIndex from './lunr-search-index.js'
import { writeIndexRecords } from './search-index-records.js' import { writeIndexRecords } from './search-index-records.js'
// Build a search data file for every combination of product version and language // Build a search data file for every combination of product version and language
@@ -14,11 +13,8 @@ import { writeIndexRecords } from './search-index-records.js'
export default async function syncSearchIndexes({ export default async function syncSearchIndexes({
language, language,
version, version,
dryRun,
notLanguage, notLanguage,
outDirectory, outDirectory,
compressFiles,
generateLunrIndex,
config = {}, config = {},
}) { }) {
const t0 = new Date() const t0 = new Date()
@@ -76,22 +72,10 @@ export default async function syncSearchIndexes({
redirects, redirects,
config config
) )
if (generateLunrIndex) { const fileWritten = await writeIndexRecords(indexName, records, outDirectory)
const index = new LunrIndex(indexName, records)
if (!dryRun) {
await index.write({ outDirectory, compressFiles })
console.log('wrote index to file: ', indexName)
}
} else {
const fileWritten = await writeIndexRecords(indexName, records, {
outDirectory,
compressFiles,
})
console.log(`wrote records to ${fileWritten}`) console.log(`wrote records to ${fileWritten}`)
} }
} }
}
const t1 = new Date() const t1 = new Date()
const tookSec = (t1.getTime() - t0.getTime()) / 1000 const tookSec = (t1.getTime() - t0.getTime()) / 1000