1
0
mirror of synced 2025-12-19 18:10:59 -05:00

Indexing and searching with Elasticsearch v8 (#33261)

This commit is contained in:
Peter Bengtsson
2023-10-23 11:37:58 -04:00
committed by GitHub
parent 9ce2cecbfa
commit 234fc8c307
5 changed files with 92 additions and 76 deletions

View File

@@ -15,8 +15,9 @@ runs:
# is one that will run tests against an Elasticsearch on localhost.
uses: getong/elasticsearch-action@95b501ab0c83dee0aac7c39b7cea3723bef14954
with:
# Make sure this matches production and `sync-search-pr.yml`
elasticsearch version: '7.11.1'
# Make sure this matches production
# It might also need to match what's available on Docker hub
elasticsearch version: '8.8.2'
host port: 9200
container port: 9200
host node port: 9300

80
package-lock.json generated
View File

@@ -7,7 +7,7 @@
"name": "docs.github.com",
"license": "(MIT AND CC-BY-4.0)",
"dependencies": {
"@elastic/elasticsearch": "7.11.0",
"@elastic/elasticsearch": "8.10.0",
"@github/failbot": "0.8.3",
"@primer/behaviors": "^1.3.5",
"@primer/css": "^21.0.7",
@@ -862,19 +862,38 @@
"dev": true
},
"node_modules/@elastic/elasticsearch": {
"version": "7.11.0",
"license": "Apache-2.0",
"version": "8.10.0",
"resolved": "https://registry.npmjs.org/@elastic/elasticsearch/-/elasticsearch-8.10.0.tgz",
"integrity": "sha512-RIEyqz0D18bz/dK+wJltaak+7wKaxDELxuiwOJhuMrvbrBsYDFnEoTdP/TZ0YszHBgnRPGqBDBgH/FHNgHObiQ==",
"dependencies": {
"debug": "^4.1.1",
"hpagent": "^0.1.1",
"ms": "^2.1.1",
"pump": "^3.0.0",
"secure-json-parse": "^2.1.0"
"@elastic/transport": "^8.3.4",
"tslib": "^2.4.0"
},
"engines": {
"node": ">=8"
"node": ">=14"
}
},
"node_modules/@elastic/transport": {
"version": "8.3.4",
"resolved": "https://registry.npmjs.org/@elastic/transport/-/transport-8.3.4.tgz",
"integrity": "sha512-+0o8o74sbzu3BO7oOZiP9ycjzzdOt4QwmMEjFc1zfO7M0Fh7QX1xrpKqZbSd8vBwihXNlSq/EnMPfgD2uFEmFg==",
"dependencies": {
"debug": "^4.3.4",
"hpagent": "^1.0.0",
"ms": "^2.1.3",
"secure-json-parse": "^2.4.0",
"tslib": "^2.4.0",
"undici": "^5.22.1"
},
"engines": {
"node": ">=14"
}
},
"node_modules/@elastic/transport/node_modules/ms": {
"version": "2.1.3",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
"integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="
},
"node_modules/@emotion/is-prop-valid": {
"version": "1.1.3",
"license": "MIT",
@@ -1024,7 +1043,6 @@
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/@fastify/busboy/-/busboy-2.0.0.tgz",
"integrity": "sha512-JUFJad5lv7jxj926GPgymrWQxxjPYuJNiNjNMzqT+HiuP6Vl3dk5xzG+8sTX96np0ZAluvaMzPsjhHZ5rNuNQQ==",
"dev": true,
"engines": {
"node": ">=14"
}
@@ -7268,8 +7286,12 @@
}
},
"node_modules/hpagent": {
"version": "0.1.2",
"license": "MIT"
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/hpagent/-/hpagent-1.2.0.tgz",
"integrity": "sha512-A91dYTeIB6NoXG+PxTQpCCDDnfHsW9kc06Lvpu1TEe9gnd6ZFeiBoRO9JvzEv6xK7EX97/dUE8g/vBMTqTS3CA==",
"engines": {
"node": ">=14"
}
},
"node_modules/html-entities": {
"version": "2.3.3",
@@ -9223,9 +9245,9 @@
}
},
"node_modules/lint-staged": {
"version": "15.0.1",
"resolved": "https://registry.npmjs.org/lint-staged/-/lint-staged-15.0.1.tgz",
"integrity": "sha512-2IU5OWmCaxch0X0+IBF4/v7sutpB+F3qoXbro43pYjQTOo5wumckjxoxn47pQBqqBsCWrD5HnI2uG/zJA7isew==",
"version": "15.0.2",
"resolved": "https://registry.npmjs.org/lint-staged/-/lint-staged-15.0.2.tgz",
"integrity": "sha512-vnEy7pFTHyVuDmCAIFKR5QDO8XLVlPFQQyujQ/STOxe40ICWqJ6knS2wSJ/ffX/Lw0rz83luRDh+ET7toN+rOw==",
"dev": true,
"dependencies": {
"chalk": "5.3.0",
@@ -9233,11 +9255,11 @@
"debug": "4.3.4",
"execa": "8.0.1",
"lilconfig": "2.1.0",
"listr2": "7.0.1",
"listr2": "7.0.2",
"micromatch": "4.0.5",
"pidtree": "0.6.0",
"string-argv": "0.3.2",
"yaml": "2.3.2"
"yaml": "2.3.3"
},
"bin": {
"lint-staged": "bin/lint-staged.js"
@@ -9410,9 +9432,9 @@
}
},
"node_modules/listr2": {
"version": "7.0.1",
"resolved": "https://registry.npmjs.org/listr2/-/listr2-7.0.1.tgz",
"integrity": "sha512-nz+7hwgbDp8eWNoDgzdl4hA/xDSLrNRzPu1TLgOYs6l5Y+Ma6zVWWy9Oyt9TQFONwKoSPoka3H50D3vD5EuNwg==",
"version": "7.0.2",
"resolved": "https://registry.npmjs.org/listr2/-/listr2-7.0.2.tgz",
"integrity": "sha512-rJysbR9GKIalhTbVL2tYbF2hVyDnrf7pFUZBwjPaMIdadYHmeT+EVi/Bu3qd7ETQPahTotg2WRCatXwRBW554g==",
"dev": true,
"dependencies": {
"cli-truncate": "^3.1.0",
@@ -13423,8 +13445,9 @@
}
},
"node_modules/secure-json-parse": {
"version": "2.5.0",
"license": "BSD-3-Clause"
"version": "2.7.0",
"resolved": "https://registry.npmjs.org/secure-json-parse/-/secure-json-parse-2.7.0.tgz",
"integrity": "sha512-6aU+Rwsezw7VR8/nyvKTx8QpWH9FrcYiXXlqC4z5d5XQBDRqtbfsRjnwGyqbi3gddNtWHuEk9OANUotL26qKUw=="
},
"node_modules/semver": {
"version": "7.5.4",
@@ -14690,10 +14713,9 @@
"license": "MIT"
},
"node_modules/undici": {
"version": "5.26.3",
"resolved": "https://registry.npmjs.org/undici/-/undici-5.26.3.tgz",
"integrity": "sha512-H7n2zmKEWgOllKkIUkLvFmsJQj062lSm3uA4EYApG8gLuiOM0/go9bIoC3HVaSnfg4xunowDE2i9p8drkXuvDw==",
"dev": true,
"version": "5.25.4",
"resolved": "https://registry.npmjs.org/undici/-/undici-5.25.4.tgz",
"integrity": "sha512-450yJxT29qKMf3aoudzFpIciqpx6Pji3hEWaXqXmanbXF58LTAGCKxcJjxMXWu3iG+Mudgo3ZUfDB6YDFd/dAw==",
"dependencies": {
"@fastify/busboy": "^2.0.0"
},
@@ -15468,9 +15490,9 @@
"license": "ISC"
},
"node_modules/yaml": {
"version": "2.3.2",
"resolved": "https://registry.npmjs.org/yaml/-/yaml-2.3.2.tgz",
"integrity": "sha512-N/lyzTPaJasoDmfV7YTrYCI0G/3ivm/9wdG0aHuheKowWQwGTsK0Eoiw6utmzAnI6pkJa0DUVygvp3spqqEKXg==",
"version": "2.3.3",
"resolved": "https://registry.npmjs.org/yaml/-/yaml-2.3.3.tgz",
"integrity": "sha512-zw0VAJxgeZ6+++/su5AFoqBbZbrEakwu+X0M5HmcwUiBL7AzcuPKjj5we4xfQLp78LkEMpD0cOnUhmgOVy3KdQ==",
"dev": true,
"engines": {
"node": ">= 14"

View File

@@ -53,7 +53,7 @@
"{content,data}/**/*.md": "npm run lint-content -- --precommit --paths"
},
"dependencies": {
"@elastic/elasticsearch": "7.11.0",
"@elastic/elasticsearch": "8.10.0",
"@github/failbot": "0.8.3",
"@primer/behaviors": "^1.3.5",
"@primer/css": "^21.0.7",

View File

@@ -89,30 +89,28 @@ export async function getSearchResults({
const highlight = getHighlightConfiguration(query, highlightFields)
const searchQuery = {
index: indexName,
highlight,
from,
size,
// COMMENTED out because of ES 7.11.
// Once we're on ES >7.11 we can add this option in.
// // Since we know exactly which fields from the source we're going
// // need we can specify that here. It's an inclusion list.
// // We can save precious network by not having to transmit fields
// // stored in Elasticsearch to here if it's not going to be needed
// // anyway.
// _source_includes: [
// 'title',
// 'url',
// 'breadcrumbs',
// // 'headings'
// 'popularity',
// ],
// Since we know exactly which fields from the source we're going
// need we can specify that here. It's an inclusion list.
// We can save precious network by not having to transmit fields
// stored in Elasticsearch to here if it's not going to be needed
// anyway.
_source_includes: ['title', 'url', 'breadcrumbs', 'popularity'],
}
// See note above why this is excluded in ES 7.11
// if (includeTopics) {
// searchQuery._source_includes.push('topics')
// }
if (includeTopics) {
searchQuery._source_includes.push('topics')
}
for (const key of ['intro', 'headings']) {
if (include.includes(key)) {
searchQuery._source_includes.push(key)
}
}
if (sort === 'best') {
// To sort by a function score, you need to wrap the primary
@@ -151,11 +149,10 @@ export async function getSearchResults({
throw new Error(`Unrecognized sort enum '${sort}'`)
}
const result = await client.search({ index: indexName, body: searchQuery })
const result = await client.search(searchQuery)
// const hitsAll = result.hits // ES >7.11
const hitsAll = result.body // ES <=7.11
const hits = getHits(hitsAll.hits.hits, {
const hitsAll = result.hits
const hits = getHits(hitsAll.hits, {
indexName,
debug,
includeTopics,
@@ -165,9 +162,9 @@ export async function getSearchResults({
const t1 = new Date()
const meta = {
found: hitsAll.hits.total,
found: hitsAll.total,
took: {
query_msec: hitsAll.took,
query_msec: result.took,
total_msec: t1.getTime() - t0.getTime(),
},
page,

View File

@@ -312,23 +312,21 @@ async function indexVersion(
await client.indices.create({
index: thisAlias,
body: {
mappings: {
properties: {
url: { type: 'keyword' },
title: { type: 'text', analyzer: 'text_analyzer', norms: false },
title_explicit: { type: 'text', analyzer: 'text_analyzer_explicit', norms: false },
content: { type: 'text', analyzer: 'text_analyzer' },
content_explicit: { type: 'text', analyzer: 'text_analyzer_explicit' },
headings: { type: 'text', analyzer: 'text_analyzer', norms: false },
headings_explicit: { type: 'text', analyzer: 'text_analyzer_explicit', norms: false },
breadcrumbs: { type: 'text' },
popularity: { type: 'float' },
intro: { type: 'text' },
},
mappings: {
properties: {
url: { type: 'keyword' },
title: { type: 'text', analyzer: 'text_analyzer', norms: false },
title_explicit: { type: 'text', analyzer: 'text_analyzer_explicit', norms: false },
content: { type: 'text', analyzer: 'text_analyzer' },
content_explicit: { type: 'text', analyzer: 'text_analyzer_explicit' },
headings: { type: 'text', analyzer: 'text_analyzer', norms: false },
headings_explicit: { type: 'text', analyzer: 'text_analyzer_explicit', norms: false },
breadcrumbs: { type: 'text' },
popularity: { type: 'float' },
intro: { type: 'text' },
},
settings,
},
settings,
})
// POPULATE
@@ -375,7 +373,7 @@ async function indexVersion(
// by a bot on a schedeule (GitHub Actions).
timeout: '5m',
}
const bulkResponse = await timed({ body: operations, ...bulkOptions })
const bulkResponse = await timed({ operations, ...bulkOptions })
if (bulkResponse.errors) {
// Some day, when we're more confident how and why this might happen
@@ -387,9 +385,7 @@ async function indexVersion(
throw new Error('Bulk errors happened.')
}
const {
body: { count },
} = await client.count({ index: thisAlias })
const { count } = await client.count({ index: thisAlias })
console.log(`Documents now in ${chalk.bold(thisAlias)}: ${chalk.bold(count.toLocaleString())}`)
// To perform an atomic operation that creates the new alias and removes
@@ -409,7 +405,7 @@ async function indexVersion(
console.log(`Alias ${indexName} -> ${thisAlias}`)
// const indices = await client.cat.indices({ format: 'json' })
const { body: indices } = await retryOnErrorTest(
const indices = await retryOnErrorTest(
(error) => {
return error instanceof errors.ResponseError && error.statusCode === 404
},