Indexing and searching with Elasticsearch v8 (#33261)
This commit is contained in:
@@ -15,8 +15,9 @@ runs:
|
||||
# is one that will run tests against an Elasticsearch on localhost.
|
||||
uses: getong/elasticsearch-action@95b501ab0c83dee0aac7c39b7cea3723bef14954
|
||||
with:
|
||||
# Make sure this matches production and `sync-search-pr.yml`
|
||||
elasticsearch version: '7.11.1'
|
||||
# Make sure this matches production
|
||||
# It might also need to match what's available on Docker hub
|
||||
elasticsearch version: '8.8.2'
|
||||
host port: 9200
|
||||
container port: 9200
|
||||
host node port: 9300
|
||||
|
||||
80
package-lock.json
generated
80
package-lock.json
generated
@@ -7,7 +7,7 @@
|
||||
"name": "docs.github.com",
|
||||
"license": "(MIT AND CC-BY-4.0)",
|
||||
"dependencies": {
|
||||
"@elastic/elasticsearch": "7.11.0",
|
||||
"@elastic/elasticsearch": "8.10.0",
|
||||
"@github/failbot": "0.8.3",
|
||||
"@primer/behaviors": "^1.3.5",
|
||||
"@primer/css": "^21.0.7",
|
||||
@@ -862,19 +862,38 @@
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/@elastic/elasticsearch": {
|
||||
"version": "7.11.0",
|
||||
"license": "Apache-2.0",
|
||||
"version": "8.10.0",
|
||||
"resolved": "https://registry.npmjs.org/@elastic/elasticsearch/-/elasticsearch-8.10.0.tgz",
|
||||
"integrity": "sha512-RIEyqz0D18bz/dK+wJltaak+7wKaxDELxuiwOJhuMrvbrBsYDFnEoTdP/TZ0YszHBgnRPGqBDBgH/FHNgHObiQ==",
|
||||
"dependencies": {
|
||||
"debug": "^4.1.1",
|
||||
"hpagent": "^0.1.1",
|
||||
"ms": "^2.1.1",
|
||||
"pump": "^3.0.0",
|
||||
"secure-json-parse": "^2.1.0"
|
||||
"@elastic/transport": "^8.3.4",
|
||||
"tslib": "^2.4.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
"node": ">=14"
|
||||
}
|
||||
},
|
||||
"node_modules/@elastic/transport": {
|
||||
"version": "8.3.4",
|
||||
"resolved": "https://registry.npmjs.org/@elastic/transport/-/transport-8.3.4.tgz",
|
||||
"integrity": "sha512-+0o8o74sbzu3BO7oOZiP9ycjzzdOt4QwmMEjFc1zfO7M0Fh7QX1xrpKqZbSd8vBwihXNlSq/EnMPfgD2uFEmFg==",
|
||||
"dependencies": {
|
||||
"debug": "^4.3.4",
|
||||
"hpagent": "^1.0.0",
|
||||
"ms": "^2.1.3",
|
||||
"secure-json-parse": "^2.4.0",
|
||||
"tslib": "^2.4.0",
|
||||
"undici": "^5.22.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
}
|
||||
},
|
||||
"node_modules/@elastic/transport/node_modules/ms": {
|
||||
"version": "2.1.3",
|
||||
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
|
||||
"integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="
|
||||
},
|
||||
"node_modules/@emotion/is-prop-valid": {
|
||||
"version": "1.1.3",
|
||||
"license": "MIT",
|
||||
@@ -1024,7 +1043,6 @@
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/@fastify/busboy/-/busboy-2.0.0.tgz",
|
||||
"integrity": "sha512-JUFJad5lv7jxj926GPgymrWQxxjPYuJNiNjNMzqT+HiuP6Vl3dk5xzG+8sTX96np0ZAluvaMzPsjhHZ5rNuNQQ==",
|
||||
"dev": true,
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
}
|
||||
@@ -7268,8 +7286,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/hpagent": {
|
||||
"version": "0.1.2",
|
||||
"license": "MIT"
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/hpagent/-/hpagent-1.2.0.tgz",
|
||||
"integrity": "sha512-A91dYTeIB6NoXG+PxTQpCCDDnfHsW9kc06Lvpu1TEe9gnd6ZFeiBoRO9JvzEv6xK7EX97/dUE8g/vBMTqTS3CA==",
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
}
|
||||
},
|
||||
"node_modules/html-entities": {
|
||||
"version": "2.3.3",
|
||||
@@ -9223,9 +9245,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/lint-staged": {
|
||||
"version": "15.0.1",
|
||||
"resolved": "https://registry.npmjs.org/lint-staged/-/lint-staged-15.0.1.tgz",
|
||||
"integrity": "sha512-2IU5OWmCaxch0X0+IBF4/v7sutpB+F3qoXbro43pYjQTOo5wumckjxoxn47pQBqqBsCWrD5HnI2uG/zJA7isew==",
|
||||
"version": "15.0.2",
|
||||
"resolved": "https://registry.npmjs.org/lint-staged/-/lint-staged-15.0.2.tgz",
|
||||
"integrity": "sha512-vnEy7pFTHyVuDmCAIFKR5QDO8XLVlPFQQyujQ/STOxe40ICWqJ6knS2wSJ/ffX/Lw0rz83luRDh+ET7toN+rOw==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"chalk": "5.3.0",
|
||||
@@ -9233,11 +9255,11 @@
|
||||
"debug": "4.3.4",
|
||||
"execa": "8.0.1",
|
||||
"lilconfig": "2.1.0",
|
||||
"listr2": "7.0.1",
|
||||
"listr2": "7.0.2",
|
||||
"micromatch": "4.0.5",
|
||||
"pidtree": "0.6.0",
|
||||
"string-argv": "0.3.2",
|
||||
"yaml": "2.3.2"
|
||||
"yaml": "2.3.3"
|
||||
},
|
||||
"bin": {
|
||||
"lint-staged": "bin/lint-staged.js"
|
||||
@@ -9410,9 +9432,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/listr2": {
|
||||
"version": "7.0.1",
|
||||
"resolved": "https://registry.npmjs.org/listr2/-/listr2-7.0.1.tgz",
|
||||
"integrity": "sha512-nz+7hwgbDp8eWNoDgzdl4hA/xDSLrNRzPu1TLgOYs6l5Y+Ma6zVWWy9Oyt9TQFONwKoSPoka3H50D3vD5EuNwg==",
|
||||
"version": "7.0.2",
|
||||
"resolved": "https://registry.npmjs.org/listr2/-/listr2-7.0.2.tgz",
|
||||
"integrity": "sha512-rJysbR9GKIalhTbVL2tYbF2hVyDnrf7pFUZBwjPaMIdadYHmeT+EVi/Bu3qd7ETQPahTotg2WRCatXwRBW554g==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"cli-truncate": "^3.1.0",
|
||||
@@ -13423,8 +13445,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/secure-json-parse": {
|
||||
"version": "2.5.0",
|
||||
"license": "BSD-3-Clause"
|
||||
"version": "2.7.0",
|
||||
"resolved": "https://registry.npmjs.org/secure-json-parse/-/secure-json-parse-2.7.0.tgz",
|
||||
"integrity": "sha512-6aU+Rwsezw7VR8/nyvKTx8QpWH9FrcYiXXlqC4z5d5XQBDRqtbfsRjnwGyqbi3gddNtWHuEk9OANUotL26qKUw=="
|
||||
},
|
||||
"node_modules/semver": {
|
||||
"version": "7.5.4",
|
||||
@@ -14690,10 +14713,9 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/undici": {
|
||||
"version": "5.26.3",
|
||||
"resolved": "https://registry.npmjs.org/undici/-/undici-5.26.3.tgz",
|
||||
"integrity": "sha512-H7n2zmKEWgOllKkIUkLvFmsJQj062lSm3uA4EYApG8gLuiOM0/go9bIoC3HVaSnfg4xunowDE2i9p8drkXuvDw==",
|
||||
"dev": true,
|
||||
"version": "5.25.4",
|
||||
"resolved": "https://registry.npmjs.org/undici/-/undici-5.25.4.tgz",
|
||||
"integrity": "sha512-450yJxT29qKMf3aoudzFpIciqpx6Pji3hEWaXqXmanbXF58LTAGCKxcJjxMXWu3iG+Mudgo3ZUfDB6YDFd/dAw==",
|
||||
"dependencies": {
|
||||
"@fastify/busboy": "^2.0.0"
|
||||
},
|
||||
@@ -15468,9 +15490,9 @@
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/yaml": {
|
||||
"version": "2.3.2",
|
||||
"resolved": "https://registry.npmjs.org/yaml/-/yaml-2.3.2.tgz",
|
||||
"integrity": "sha512-N/lyzTPaJasoDmfV7YTrYCI0G/3ivm/9wdG0aHuheKowWQwGTsK0Eoiw6utmzAnI6pkJa0DUVygvp3spqqEKXg==",
|
||||
"version": "2.3.3",
|
||||
"resolved": "https://registry.npmjs.org/yaml/-/yaml-2.3.3.tgz",
|
||||
"integrity": "sha512-zw0VAJxgeZ6+++/su5AFoqBbZbrEakwu+X0M5HmcwUiBL7AzcuPKjj5we4xfQLp78LkEMpD0cOnUhmgOVy3KdQ==",
|
||||
"dev": true,
|
||||
"engines": {
|
||||
"node": ">= 14"
|
||||
|
||||
@@ -53,7 +53,7 @@
|
||||
"{content,data}/**/*.md": "npm run lint-content -- --precommit --paths"
|
||||
},
|
||||
"dependencies": {
|
||||
"@elastic/elasticsearch": "7.11.0",
|
||||
"@elastic/elasticsearch": "8.10.0",
|
||||
"@github/failbot": "0.8.3",
|
||||
"@primer/behaviors": "^1.3.5",
|
||||
"@primer/css": "^21.0.7",
|
||||
|
||||
@@ -89,30 +89,28 @@ export async function getSearchResults({
|
||||
const highlight = getHighlightConfiguration(query, highlightFields)
|
||||
|
||||
const searchQuery = {
|
||||
index: indexName,
|
||||
highlight,
|
||||
from,
|
||||
size,
|
||||
|
||||
// COMMENTED out because of ES 7.11.
|
||||
// Once we're on ES >7.11 we can add this option in.
|
||||
// // Since we know exactly which fields from the source we're going
|
||||
// // need we can specify that here. It's an inclusion list.
|
||||
// // We can save precious network by not having to transmit fields
|
||||
// // stored in Elasticsearch to here if it's not going to be needed
|
||||
// // anyway.
|
||||
// _source_includes: [
|
||||
// 'title',
|
||||
// 'url',
|
||||
// 'breadcrumbs',
|
||||
// // 'headings'
|
||||
// 'popularity',
|
||||
// ],
|
||||
// Since we know exactly which fields from the source we're going
|
||||
// need we can specify that here. It's an inclusion list.
|
||||
// We can save precious network by not having to transmit fields
|
||||
// stored in Elasticsearch to here if it's not going to be needed
|
||||
// anyway.
|
||||
_source_includes: ['title', 'url', 'breadcrumbs', 'popularity'],
|
||||
}
|
||||
|
||||
// See note above why this is excluded in ES 7.11
|
||||
// if (includeTopics) {
|
||||
// searchQuery._source_includes.push('topics')
|
||||
// }
|
||||
if (includeTopics) {
|
||||
searchQuery._source_includes.push('topics')
|
||||
}
|
||||
|
||||
for (const key of ['intro', 'headings']) {
|
||||
if (include.includes(key)) {
|
||||
searchQuery._source_includes.push(key)
|
||||
}
|
||||
}
|
||||
|
||||
if (sort === 'best') {
|
||||
// To sort by a function score, you need to wrap the primary
|
||||
@@ -151,11 +149,10 @@ export async function getSearchResults({
|
||||
throw new Error(`Unrecognized sort enum '${sort}'`)
|
||||
}
|
||||
|
||||
const result = await client.search({ index: indexName, body: searchQuery })
|
||||
const result = await client.search(searchQuery)
|
||||
|
||||
// const hitsAll = result.hits // ES >7.11
|
||||
const hitsAll = result.body // ES <=7.11
|
||||
const hits = getHits(hitsAll.hits.hits, {
|
||||
const hitsAll = result.hits
|
||||
const hits = getHits(hitsAll.hits, {
|
||||
indexName,
|
||||
debug,
|
||||
includeTopics,
|
||||
@@ -165,9 +162,9 @@ export async function getSearchResults({
|
||||
const t1 = new Date()
|
||||
|
||||
const meta = {
|
||||
found: hitsAll.hits.total,
|
||||
found: hitsAll.total,
|
||||
took: {
|
||||
query_msec: hitsAll.took,
|
||||
query_msec: result.took,
|
||||
total_msec: t1.getTime() - t0.getTime(),
|
||||
},
|
||||
page,
|
||||
|
||||
@@ -312,23 +312,21 @@ async function indexVersion(
|
||||
|
||||
await client.indices.create({
|
||||
index: thisAlias,
|
||||
body: {
|
||||
mappings: {
|
||||
properties: {
|
||||
url: { type: 'keyword' },
|
||||
title: { type: 'text', analyzer: 'text_analyzer', norms: false },
|
||||
title_explicit: { type: 'text', analyzer: 'text_analyzer_explicit', norms: false },
|
||||
content: { type: 'text', analyzer: 'text_analyzer' },
|
||||
content_explicit: { type: 'text', analyzer: 'text_analyzer_explicit' },
|
||||
headings: { type: 'text', analyzer: 'text_analyzer', norms: false },
|
||||
headings_explicit: { type: 'text', analyzer: 'text_analyzer_explicit', norms: false },
|
||||
breadcrumbs: { type: 'text' },
|
||||
popularity: { type: 'float' },
|
||||
intro: { type: 'text' },
|
||||
},
|
||||
mappings: {
|
||||
properties: {
|
||||
url: { type: 'keyword' },
|
||||
title: { type: 'text', analyzer: 'text_analyzer', norms: false },
|
||||
title_explicit: { type: 'text', analyzer: 'text_analyzer_explicit', norms: false },
|
||||
content: { type: 'text', analyzer: 'text_analyzer' },
|
||||
content_explicit: { type: 'text', analyzer: 'text_analyzer_explicit' },
|
||||
headings: { type: 'text', analyzer: 'text_analyzer', norms: false },
|
||||
headings_explicit: { type: 'text', analyzer: 'text_analyzer_explicit', norms: false },
|
||||
breadcrumbs: { type: 'text' },
|
||||
popularity: { type: 'float' },
|
||||
intro: { type: 'text' },
|
||||
},
|
||||
settings,
|
||||
},
|
||||
settings,
|
||||
})
|
||||
|
||||
// POPULATE
|
||||
@@ -375,7 +373,7 @@ async function indexVersion(
|
||||
// by a bot on a schedeule (GitHub Actions).
|
||||
timeout: '5m',
|
||||
}
|
||||
const bulkResponse = await timed({ body: operations, ...bulkOptions })
|
||||
const bulkResponse = await timed({ operations, ...bulkOptions })
|
||||
|
||||
if (bulkResponse.errors) {
|
||||
// Some day, when we're more confident how and why this might happen
|
||||
@@ -387,9 +385,7 @@ async function indexVersion(
|
||||
throw new Error('Bulk errors happened.')
|
||||
}
|
||||
|
||||
const {
|
||||
body: { count },
|
||||
} = await client.count({ index: thisAlias })
|
||||
const { count } = await client.count({ index: thisAlias })
|
||||
console.log(`Documents now in ${chalk.bold(thisAlias)}: ${chalk.bold(count.toLocaleString())}`)
|
||||
|
||||
// To perform an atomic operation that creates the new alias and removes
|
||||
@@ -409,7 +405,7 @@ async function indexVersion(
|
||||
console.log(`Alias ${indexName} -> ${thisAlias}`)
|
||||
|
||||
// const indices = await client.cat.indices({ format: 'json' })
|
||||
const { body: indices } = await retryOnErrorTest(
|
||||
const indices = await retryOnErrorTest(
|
||||
(error) => {
|
||||
return error instanceof errors.ResponseError && error.statusCode === 404
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user