1
0
mirror of synced 2026-01-05 21:04:17 -05:00

repo sync

This commit is contained in:
Octomerger Bot
2021-01-29 05:55:07 +10:00
committed by GitHub
53 changed files with 237 additions and 8 deletions

View File

@@ -287,7 +287,7 @@ function tmplSearchResult ({ url, breadcrumbs, heading, title, content }) {
)
}
// Convert em to mark tags in search responses
// Convert mark tags in search responses
function markify (text) {
const { mark } = tags
return text

21
lib/search/compress.js Normal file
View File

@@ -0,0 +1,21 @@
const { promisify } = require('util')
const zlib = require('zlib')
const brotliCompress = promisify(zlib.brotliCompress)
const brotliDecompress = promisify(zlib.brotliDecompress)
const options = {
params: {
[zlib.constants.BROTLI_PARAM_MODE]: zlib.constants.BROTLI_MODE_TEXT,
[zlib.constants.BROTLI_PARAM_QUALITY]: 6
}
}
module.exports = {
async compress (data) {
return brotliCompress(data, options)
},
async decompress (data) {
return brotliDecompress(data, options)
}
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,6 @@
const fs = require('fs').promises
const path = require('path')
module.exports = async function getIndexNames () {
return await fs.readdir(path.join(__dirname, 'indexes'))
}

View File

@@ -0,0 +1,93 @@
const lunr = require('lunr')
require('lunr-languages/lunr.stemmer.support')(lunr)
require('lunr-languages/tinyseg')(lunr)
require('lunr-languages/lunr.ja')(lunr)
require('lunr-languages/lunr.es')(lunr)
require('lunr-languages/lunr.pt')(lunr)
require('lunr-languages/lunr.de')(lunr)
const fs = require('fs').promises
const path = require('path')
const rank = require('./rank')
const validateRecords = require('./validate-records')
const { compress } = require('./compress')
module.exports = class LunrIndex {
constructor (name, records) {
this.name = name
// Add custom rankings
this.records = records.map(record => {
record.customRanking = rank(record)
return record
})
this.validate()
return this
}
validate () {
return validateRecords(this.name, this.records)
}
build () {
const language = this.name.split('-').pop()
const records = this.records
this.index = lunr(function constructIndex () { // No arrow here!
if (['ja', 'es', 'pt', 'de'].includes(language)) {
this.use(lunr[language])
}
this.ref('objectID')
this.field('url')
this.field('slug')
this.field('breadcrumbs')
this.field('heading')
this.field('title')
this.field('content')
this.field('customRanking')
this.metadataWhitelist = ['position']
for (const record of records) {
this.add(record)
}
})
}
toJSON () {
this.build()
return JSON.stringify(this.index, null, 2)
}
get recordsObject () {
return Object.fromEntries(
this.records.map(record => [record.objectID, record])
)
}
async write () {
this.build()
// Write the parsed records
await Promise.resolve(this.recordsObject)
.then(JSON.stringify)
.then(compress)
.then(content => fs.writeFile(
path.posix.join(__dirname, 'indexes', `${this.name}-records.json.br`),
content
// Do not set to 'utf8'
))
// Write the index
await Promise.resolve(this.index)
.then(JSON.stringify)
.then(compress)
.then(content => fs.writeFile(
path.posix.join(__dirname, 'indexes', `${this.name}.json.br`),
content
// Do not set to 'utf8'
))
}
}

81
lib/search/lunr-search.js Normal file
View File

@@ -0,0 +1,81 @@
const fs = require('fs').promises
const path = require('path')
const lunr = require('lunr')
const { get } = require('lodash')
const { namePrefix } = require('./config')
const { decompress } = require('./compress')
const LUNR_DIR = './indexes'
const lunrIndexes = new Map()
const lunrRecords = new Map()
module.exports = async function loadLunrResults ({ version, language, query, limit }) {
const indexName = `${namePrefix}-${version}-${language}`
if (!lunrIndexes.has(indexName) || !lunrRecords.has(indexName)) {
lunrIndexes.set(indexName, await loadLunrIndex(indexName))
lunrRecords.set(indexName, await loadLunrRecords(indexName))
}
const results = lunrIndexes.get(indexName)
.search(query)
.slice(0, limit)
.map((result) => {
const record = lunrRecords.get(indexName)[result.ref]
return {
url: result.ref,
breadcrumbs: field(result, record, 'breadcrumbs'),
heading: field(result, record, 'heading'),
title: field(result, record, 'title'),
content: field(result, record, 'content')
}
})
return results
}
async function loadLunrIndex (indexName) {
const filePath = path.posix.join(__dirname, LUNR_DIR, `${indexName}.json.br`)
// Do not set to 'utf8' on file reads
return fs.readFile(filePath)
.then(decompress)
.then(JSON.parse)
.then(lunr.Index.load)
}
async function loadLunrRecords (indexName) {
const filePath = path.posix.join(__dirname, LUNR_DIR, `${indexName}-records.json.br`)
// Do not set to 'utf8' on file reads
return fs.readFile(filePath)
.then(decompress)
.then(JSON.parse)
}
function field (result, record, name) {
const text = record[name]
if (!text) return text
// First, get a list of all the positions of the matching tokens
const positions = Object.values(result.matchData.metadata)
.map(fields => get(fields, [name, 'position']))
.filter(Boolean)
.flat()
.sort((a, b) => a[0] - b[0])
.map(([start, length]) => [start, start + length])
.map(([start, end], i, a) => [i && a[i - 1][1], start, end])
// If this field has no token matches, no highlighting
if (!positions.length) return text
// Highlight the text
return positions
.map(([prev, start, end], i) => [
text.slice(prev, start),
mark(text.slice(start, end)),
i === positions.length - 1 && text.slice(end)
])
.flat()
.filter(Boolean)
.join('')
}
function mark (text) {
return `<mark>${text}</mark>`
}

View File

@@ -14,6 +14,10 @@ const { namePrefix } = require('./config')
const getRemoteIndexNames = require('./algolia-get-remote-index-names')
const AlgoliaIndex = require('./algolia-search-index')
// Lunr
const LunrIndex = require('./lunr-search-index')
const getLunrIndexNames = require('./lunr-get-index-names')
// Build a search data file for every combination of product version and language
// e.g. `github-docs-dotcom-en.json` and `github-docs-2.14-ja.json`
module.exports = async function syncSearchIndexes (opts = {}) {
@@ -67,22 +71,31 @@ module.exports = async function syncSearchIndexes (opts = {}) {
// The page version will be the new version, e.g., free-pro-team@latest, enterprise-server@2.22
const records = await buildRecords(indexName, indexablePages, pageVersion, languageCode)
const index = new AlgoliaIndex(indexName, records)
const index = process.env.USE_LUNR
? new LunrIndex(indexName, records)
: new AlgoliaIndex(indexName, records)
if (opts.dryRun) {
const cacheFile = path.join(cacheDir, `${indexName}.json`)
fs.writeFileSync(cacheFile, JSON.stringify(index, null, 2))
console.log('wrote dry-run index to disk: ', cacheFile)
} else {
await index.syncWithRemote()
console.log('synced index with remote: ', indexName)
if (process.env.USE_LUNR) {
await index.write()
console.log('wrote index to file: ', indexName)
} else {
await index.syncWithRemote()
console.log('synced index with remote: ', indexName)
}
}
}
}
// Fetch a list of index names and cache it for tests
// to ensure that an index exists for every language and GHE version
const remoteIndexNames = await getRemoteIndexNames()
const remoteIndexNames = process.env.USE_LUNR
? await getLunrIndexNames()
: await getRemoteIndexNames()
const cachedIndexNamesFile = path.join(__dirname, './cached-index-names.json')
fs.writeFileSync(
cachedIndexNamesFile,

View File

@@ -1,6 +1,7 @@
const express = require('express')
const languages = new Set(Object.keys(require('../lib/languages')))
const versions = require('../lib/search/versions')
const loadLunrResults = require('../lib/search/lunr-search')
const loadAlgoliaResults = require('../lib/search/algolia-search')
const router = express.Router()
@@ -11,8 +12,8 @@ router.get('/', async (req, res) => {
'cache-control': 'private, no-store'
})
const { query, version, language } = req.query
const limit = Math.min(parseInt(req.query.limit, 10) || 10, 100)
const { query, version, language, limit: limit_ } = req.query
const limit = Math.min(parseInt(limit_, 10) || 10, 100)
if (!versions.has(version) || !languages.has(language)) {
return res.status(400).json([])
}
@@ -21,7 +22,9 @@ router.get('/', async (req, res) => {
}
try {
const results = await loadAlgoliaResults({ version, language, query, limit })
const results = process.env.USE_LUNR
? await loadLunrResults({ version, language, query, limit })
: await loadAlgoliaResults({ version, language, query, limit })
return res.status(200).json(results)
} catch (err) {
console.error(err)

10
package-lock.json generated
View File

@@ -17925,6 +17925,16 @@
"yallist": "^4.0.0"
}
},
"lunr": {
"version": "2.3.9",
"resolved": "https://registry.npmjs.org/lunr/-/lunr-2.3.9.tgz",
"integrity": "sha512-zTU3DaZaF3Rt9rhN3uBMGQD3dD2/vFQqnvZCDv4dl5iOzq2IZQqTxu90r4E5J+nP70J3ilqVCrbho2eWaeW8Ow=="
},
"lunr-languages": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/lunr-languages/-/lunr-languages-1.4.0.tgz",
"integrity": "sha512-YWfZDExJN/MJEVE/DbM4AuVRLsqeHi+q3wmECMsWjGIOkd5mr9DUNos7fv8f5do9VLRMYXIzFjn+N4+KPI9pQA=="
},
"macos-release": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/macos-release/-/macos-release-2.3.0.tgz",

View File

@@ -65,6 +65,8 @@
"linkinator": "^2.13.1",
"liquid": "^5.1.0",
"lodash": "^4.17.19",
"lunr": "^2.3.9",
"lunr-languages": "^1.4.0",
"mdast-util-from-markdown": "^0.8.4",
"mini-css-extract-plugin": "^0.9.0",
"mkdirp": "^1.0.3",