1
0
mirror of synced 2025-12-19 18:10:59 -05:00

Move site search to use an endpoint (#17359)

* Move site search to use an endpoint

* Update browser.js

* Update search.js

* Update lib/search/versions.js

Co-authored-by: James M. Greene <JamesMGreene@github.com>

* Fix URLs

Co-authored-by: James M. Greene <JamesMGreene@github.com>
This commit is contained in:
Kevin Heis
2021-01-20 07:37:42 -08:00
committed by GitHub
parent c5c2347f0a
commit 2fb2e962bc
30 changed files with 436 additions and 416 deletions

1
.gitignore vendored
View File

@@ -1,4 +1,5 @@
.algolia-cache .algolia-cache
.search-cache
.DS_Store .DS_Store
.env .env
/node_modules/ /node_modules/

View File

@@ -90,10 +90,10 @@ Why do we need this? For our daily shipping needs, it's tolerable that search up
### Code files ### Code files
- [javascripts/search.js](javascripts/search.js) - The browser-side code that enables search using Algolia's [InstantSearch.js](https://github.com/algolia/instantsearch.js/) library. - [javascripts/search.js](javascripts/search.js) - The browser-side code that enables search.
- [lib/algolia/client.js](lib/algolia/client.js) - A thin wrapper around the [algoliasearch](https://ghub.io/algoliasearch) Node.js module for interacting with the Algolia API. - [lib/search/algolia-client.js](lib/search/algolia-client.js) - A thin wrapper around the [algoliasearch](https://ghub.io/algoliasearch) Node.js module for interacting with the Algolia API.
- [lib/algolia/search-index.js](lib/algolia/search-index.js) - A class for generating structured search data from repository content and syncing it with the remote Algolia service. This class has built-in validation to ensure that all records are valid before they're uploaded. This class also takes care of removing deprecated records, and compares existing remote records with the latest local records to avoid uploading records that haven't changed. - [lib/search/algolia-search-index.js](lib/search/algolia-search-index.js) - A class for generating structured search data from repository content and syncing it with the remote Algolia service. This class has built-in validation to ensure that all records are valid before they're uploaded. This class also takes care of removing deprecated records, and compares existing remote records with the latest local records to avoid uploading records that haven't changed.
- [script/sync-algolia-search-indices.js](script/sync-algolia-search-indices.js) - The script used by the Actions workflow to update search indices on our Algolia account. This can also be [run in the development environment](#development). - [script/sync-search-indices.js](script/sync-search-indices.js) - The script used by the Actions workflow to update search indices on our Algolia account. This can also be [run in the development environment](#development).
- [tests/algolia-search.js](tests/algolia-search.js) - Tests! - [tests/algolia-search.js](tests/algolia-search.js) - Tests!
## Indices ## Indices
@@ -136,4 +136,4 @@ Each record represents a section of a page. Sections are derived by splitting up
- It's not strictly necessary to set an `objectID` as Algolia will create one automatically, but by creating our own we have a guarantee that subsequent invocations of this upload script will overwrite existing records instead of creating numerous duplicate records with differing IDs. - It's not strictly necessary to set an `objectID` as Algolia will create one automatically, but by creating our own we have a guarantee that subsequent invocations of this upload script will overwrite existing records instead of creating numerous duplicate records with differing IDs.
- Algolia has typo tolerance. Try spelling something wrong and see what you get! - Algolia has typo tolerance. Try spelling something wrong and see what you get!
- Algolia has lots of controls for customizing each index, so we can add weights to certain attributes and create rules like "title is more important than body", etc. But it works pretty well as-is without any configuration. - Algolia has lots of controls for customizing each index, so we can add weights to certain attributes and create rules like "title is more important than body", etc. But it works pretty well as-is without any configuration.
- Algolia has support for "advanced query syntax" for exact matching of quoted expressions and exclusion of words preceded by a `-` sign. This is off by default but we have it enabled in our browser client. This and many other settings can be configured in Algolia.com web interface. The settings in the web interface can be overridden by the InstantSearch.js client. See [javascripts/search.js]([javascripts/search.js). - Algolia has support for "advanced query syntax" for exact matching of quoted expressions and exclusion of words preceded by a `-` sign. This is off by default but we have it enabled in our browser client. This and many other settings can be configured in Algolia.com web interface. The settings in the web interface can be overridden by the search endpoint. See [middleware/search.js]([middleware/search.js).

View File

@@ -5,8 +5,6 @@
- On all other pages, in the header - On all other pages, in the header
--> -->
<form class="mb-0" aria-hidden="true"> <div id="search-input-container" aria-hidden="true">
<div id="search-input-container"> <!-- will add a search input here -->
<!-- Algolia instantsearch.js will add a search input here --> </div>
</div>
</form>

View File

@@ -1,5 +1,6 @@
import murmur from 'imurmurhash' import murmur from 'imurmurhash'
import { getUserEventsId, sendEvent } from './events' import { getUserEventsId, sendEvent } from './events'
// import h from './hyperscript'
const TREATMENT = 'TREATMENT' const TREATMENT = 'TREATMENT'
const CONTROL = 'CONTROL' const CONTROL = 'CONTROL'
@@ -19,23 +20,6 @@ export async function sendSuccess (test) {
}) })
} }
const xmlns = 'http://www.w3.org/2000/svg'
export function h (tagName, attributes = {}, children = []) {
const el = ['svg', 'path'].includes(tagName)
? document.createElementNS(xmlns, tagName)
: document.createElement(tagName)
Object.entries(attributes).forEach(
([key, value]) => el.setAttribute(key, value)
)
children.forEach(child =>
typeof child === 'string'
? el.append(document.createTextNode(child))
: el.append(child)
)
return el
}
export default function () { export default function () {
// const testName = '$test-name$' // const testName = '$test-name$'
// const xbucket = bucket(testName) // const xbucket = bucket(testName)

View File

@@ -1,15 +0,0 @@
// This module overrides "Hogan" that instantsearch.js uses
// Hogan uses `new Function`,
// so we can't use it with our content security policy.
// Turns out, we use all our own templates anyway,
// so we just have to shim out Hogan so it doesn't error!
export default {
compile (template) {
return {
render (data) {
return ''
}
}
}
}

View File

@@ -0,0 +1,44 @@
const xmlns = 'http://www.w3.org/2000/svg'
const plainObjectConstructor = {}.constructor
function exists (value) {
return value !== null && typeof value !== 'undefined'
}
function isPlainObject (value) {
return value.constructor === plainObjectConstructor
}
function isString (value) {
return typeof value === 'string'
}
function renderChildren (el, children) {
for (const child of children) {
if (isPlainObject(child)) {
Object.entries(child)
.filter(([key, value]) => exists(value))
.forEach(([key, value]) => el.setAttribute(key, value))
} else if (Array.isArray(child)) {
renderChildren(el, child)
} else if (isString(child)) {
el.append(document.createTextNode(child))
} else {
el.append(child)
}
}
}
export default function h (tagName, ...children) {
const el = ['svg', 'path'].includes(tagName)
? document.createElementNS(xmlns, tagName)
: document.createElement(tagName)
renderChildren(el, children)
return el
}
export const tags = Object.fromEntries(
['div', 'form', 'a', 'input', 'button', 'ol', 'li', 'em']
.map(tagName => [tagName, (...args) => h(tagName, ...args)])
)

View File

@@ -1,9 +1,6 @@
import { tags } from './hyperscript'
import { sendEvent } from './events' import { sendEvent } from './events'
const instantsearch = require('instantsearch.js').default
const { searchBox, hits, configure, analytics } = require('instantsearch.js/es/widgets')
const algoliasearch = require('algoliasearch')
const searchWithYourKeyboard = require('search-with-your-keyboard') const searchWithYourKeyboard = require('search-with-your-keyboard')
const querystring = require('querystring')
const truncate = require('html-truncate') const truncate = require('html-truncate')
const languages = require('../lib/languages') const languages = require('../lib/languages')
const allVersions = require('../lib/all-versions') const allVersions = require('../lib/all-versions')
@@ -12,261 +9,96 @@ const nonEnterpriseDefaultVersion = require('../lib/non-enterprise-default-versi
const languageCodes = Object.keys(languages) const languageCodes = Object.keys(languages)
const maxContentLength = 300 const maxContentLength = 300
const hasStandaloneSearch = () => document.getElementById('landing') || document.querySelector('body.error-404') !== null let $searchInputContainer
let $searchResultsContainer
let $searchOverlay
let $searchInput
const resultTemplate = (item) => { let placeholder = 'Search topics, products...'
// Attach an `algolia-query` param to each result link so analytics let version
// can track the search query that led the user to this result let language
const input = document.querySelector('#search-input-container input')
if (input) {
const url = new URL(item.objectID, window.location.origin)
const queryParams = new URLSearchParams(url.search.slice(1))
queryParams.append('algolia-query', input.value)
url.search = queryParams.toString()
item.modifiedURL = url.toString()
}
// Display page title and heading (if present exists) export default function search () {
const title = item._highlightResult.heading $searchInputContainer = document.getElementById('search-input-container')
? [item._highlightResult.title.value, item._highlightResult.heading.value].join(': ') $searchResultsContainer = document.getElementById('search-results-container')
: item._highlightResult.title.value
// Remove redundant title from the end of breadcrumbs if (!$searchInputContainer || !$searchResultsContainer) return
if (item.breadcrumbs && item.breadcrumbs.endsWith(item.title)) {
item.modifiedBreadcrumbs = item.breadcrumbs.replace(' / ' + item.title, '')
} else {
item.modifiedBreadcrumbs = item.breadcrumbs
}
// Truncate and ellipsize the content string without breaking any HTML $searchOverlay = document.querySelector('.search-overlay-desktop')
// within it, such as the <mark> tags added by Algolia for emphasis.
item.modifiedContent = truncate(item._highlightResult.content.value, maxContentLength)
// Construct the template to return // There's an index for every version/language combination
const html = ` version = deriveVersionFromPath()
<div class="search-result border-top border-gray-light py-3 px-2"> language = deriveLanguageCodeFromPath()
<a href="#" class="no-underline">
<div class="search-result-breadcrumbs d-block text-gray-dark opacity-60 text-small pb-1">${item.modifiedBreadcrumbs}</div>
<div class="search-result-title d-block h4-mktg text-gray-dark">${title}</div>
<div class="search-result-content d-block text-gray">${item.modifiedContent}</div>
</a>
</div>
`
// Sanitize the link's href attribute using the DOM API to prevent XSS
const fragment = document.createRange().createContextualFragment(html)
fragment.querySelector('a').setAttribute('href', item.modifiedURL)
const div = document.createElement('div')
div.appendChild(fragment.cloneNode(true))
return div.innerHTML
}
export default function () {
if (!document.querySelector('#search-results-container')) return
window.initialPageLoad = true
const opts = {
// https://www.algolia.com/apps/ZI5KPY1HBE/dashboard
// This API key is public. There's also a private API key for writing to the Algolia API
searchClient: algoliasearch('ZI5KPY1HBE', '685df617246c3a10abba589b4599288f'),
// There's an index for every version/language combination
indexName: `github-docs-${deriveVersionFromPath()}-${deriveLanguageCodeFromPath()}`,
// allows "phrase queries" and "prohibit operator"
// https://www.algolia.com/doc/api-reference/api-parameters/advancedSyntax/
advancedSyntax: true,
// sync query params to search input
routing: true,
searchFunction: helper => {
// console.log('searchFunction', helper.state)
const query = helper.state.query
const queryPresent = query && query.length > 0
const results = document.querySelector('.ais-Hits')
// avoid conducting an empty search on page load;
if (window.initialPageLoad && !queryPresent) return
// after page load, search should be executed (even if the query is empty)
// so as not to upset the default instantsearch.js behaviors like clearing
// the input when [x] is clicked.
helper.search()
// If on homepage, toggle results container if query is present
if (hasStandaloneSearch()) {
const container = document.getElementById('search-results-container')
// Primer classNames for showing and hiding the results container
const activeClass = container.getAttribute('data-active-class')
const inactiveClass = container.getAttribute('data-inactive-class')
if (!activeClass) {
console.error('container is missing required `data-active-class` attribute', container)
return
}
if (!inactiveClass) {
console.error('container is missing required `data-inactive-class` attribute', container)
return
}
// hide the container when no query is present
container.classList.toggle(activeClass, queryPresent)
container.classList.toggle(inactiveClass, !queryPresent)
}
// Hack to work around a mysterious bug where the input is not cleared
// when the [x] is clicked. Note: this bug only occurs on pages
// loaded with a ?query=foo param already present
if (!queryPresent) {
setTimeout(() => {
document.querySelector('#search-input-container input').value = ''
}, 50)
results.style.display = 'none'
}
if (queryPresent && results) results.style.display = 'block'
window.initialPageLoad = false
toggleSearchDisplay()
}
}
const search = instantsearch(opts)
// Find search placeholder text in a <meta> tag, falling back to a default // Find search placeholder text in a <meta> tag, falling back to a default
const placeholderMeta = document.querySelector('meta[name="site.data.ui.search.placeholder"]') const $placeholderMeta = document.querySelector('meta[name="site.data.ui.search.placeholder"]')
const placeholder = placeholderMeta ? placeholderMeta.content : 'Search topics, products...' if ($placeholderMeta) {
placeholder = $placeholderMeta.content
}
search.addWidgets( $searchInputContainer.append(tmplSearchInput())
[ $searchInput = $searchInputContainer.querySelector('input')
hits({
container: '#search-results-container',
templates: {
empty: 'No results',
item: resultTemplate
},
// useful for debugging template context, if needed
transformItems: items => {
// console.log(`transformItems`, items)
return items
}
}),
configure({
analyticsTags: [
'site:docs.github.com',
`env:${process.env.NODE_ENV}`
]
}),
searchBox({
container: '#search-input-container',
placeholder,
// only autofocus on the homepage, and only if no #hash is present in the URL
autofocus: (hasStandaloneSearch()) && !window.location.hash.length,
showReset: false,
showSubmit: false
}),
analytics({
pushFunction (params, state, results) {
sendEvent({
type: 'search',
search_query: results.query
// search_context
})
}
})
]
)
// enable for debugging
search.on('render', (...args) => {
// console.log(`algolia render`, args)
})
search.on('error', (...args) => {
console.error('algolia error', args)
})
search.start()
searchWithYourKeyboard('#search-input-container input', '.ais-Hits-item') searchWithYourKeyboard('#search-input-container input', '.ais-Hits-item')
toggleSearchDisplay() toggleSearchDisplay()
// delay removal of the query param so analytics client code has a chance to track it $searchInput.addEventListener('keyup', debounce(onSearch))
setTimeout(() => { removeAlgoliaQueryTrackingParam() }, 500)
} }
// When a user performs an in-site search an `algolia-query` param is // The home page and 404 pages have a standalone search
// added to the URL so analytics can track the queries and the pages function hasStandaloneSearch () {
// they lead to. This function strips the query from the URL after page load, return document.getElementById('landing') ||
// so the bare article URL can be copied/bookmarked/shared, sans tracking param document.querySelector('body.error-404') !== null
function removeAlgoliaQueryTrackingParam () {
if (
history &&
history.replaceState &&
location &&
location.search &&
location.search.includes('algolia-query=')
) {
// parse the query string, remove the `algolia-query`, and put it all back together
let q = querystring.parse(location.search.replace(/^\?/, ''))
delete q['algolia-query']
q = Object.keys(q).length ? '?' + querystring.stringify(q) : ''
// update the URL in the address bar without modifying the history
history.replaceState(null, '', `${location.pathname}${q}${location.hash}`)
}
} }
function toggleSearchDisplay (isReset) { function toggleSearchDisplay () {
const input = document.querySelector('#search-input-container input')
const overlay = document.querySelector('.search-overlay-desktop')
// If not on homepage...
if (!hasStandaloneSearch()) {
// Open modal if input is clicked
input.addEventListener('focus', () => {
openSearch()
})
// Close modal if overlay is clicked
if (overlay) {
overlay.addEventListener('click', () => {
closeSearch()
})
}
// Open modal if page loads with query in the params/input
if (input.value) {
openSearch()
}
}
// Clear/close search, if ESC is clicked // Clear/close search, if ESC is clicked
document.addEventListener('keyup', (e) => { document.addEventListener('keyup', (e) => {
if (e.key === 'Escape') { if (e.key === 'Escape') {
closeSearch() closeSearch()
} }
}) })
// If not on homepage...
if (hasStandaloneSearch()) return
const $input = $searchInput
// Open modal if input is clicked
$input.addEventListener('focus', () => {
openSearch()
})
// Close modal if overlay is clicked
if ($searchOverlay) {
$searchOverlay.addEventListener('click', () => {
closeSearch()
})
}
// Open modal if page loads with query in the params/input
if ($input.value) {
openSearch()
}
} }
function openSearch () { function openSearch () {
document.querySelector('#search-input-container input').classList.add('js-open') $searchInput.classList.add('js-open')
document.querySelector('#search-results-container').classList.add('js-open') $searchResultsContainer.classList.add('js-open')
document.querySelector('.search-overlay-desktop').classList.add('js-open') $searchOverlay.classList.add('js-open')
} }
function closeSearch () { function closeSearch () {
// Close modal if not on homepage // Close modal if not on homepage
if (!hasStandaloneSearch()) { if (!hasStandaloneSearch()) {
document.querySelector('#search-input-container input').classList.remove('js-open') $searchInput.classList.remove('js-open')
document.querySelector('#search-results-container').classList.remove('js-open') $searchResultsContainer.classList.remove('js-open')
document.querySelector('.search-overlay-desktop').classList.remove('js-open') $searchOverlay.classList.remove('js-open')
} }
document.querySelector('.ais-Hits').style.display = 'none' const $hits = $searchResultsContainer.querySelector('.ais-Hits')
document.querySelector('#search-input-container input').value = '' if ($hits) $hits.style.display = 'none'
window.history.replaceState({}, 'clear search query', window.location.pathname) $searchInput.value = ''
} }
function deriveLanguageCodeFromPath () { function deriveLanguageCodeFromPath () {
@@ -277,8 +109,8 @@ function deriveLanguageCodeFromPath () {
function deriveVersionFromPath () { function deriveVersionFromPath () {
// fall back to the non-enterprise default version (FPT currently) on the homepage, 404 page, etc. // fall back to the non-enterprise default version (FPT currently) on the homepage, 404 page, etc.
const version = location.pathname.split('/')[2] || nonEnterpriseDefaultVersion const versionStr = location.pathname.split('/')[2] || nonEnterpriseDefaultVersion
const versionObject = allVersions[version] || allVersions[nonEnterpriseDefaultVersion] const versionObject = allVersions[versionStr] || allVersions[nonEnterpriseDefaultVersion]
// if GHES, returns the release number like 2.21, 2.22, etc. // if GHES, returns the release number like 2.21, 2.22, etc.
// if FPT, returns 'dotcom' // if FPT, returns 'dotcom'
@@ -287,3 +119,148 @@ function deriveVersionFromPath () {
? versionObject.currentRelease ? versionObject.currentRelease
: versionObject.miscBaseName : versionObject.miscBaseName
} }
function debounce (fn, delay = 300) {
let timer
return (...args) => {
clearTimeout(timer)
timer = setTimeout(() => fn.apply(null, args), delay)
}
}
async function onSearch (evt) {
const query = evt.target.value
const url = new URL(location.origin)
url.pathname = '/search'
url.search = new URLSearchParams({ query, version, language }).toString()
const response = await fetch(url, {
method: 'GET',
headers: {
'Content-Type': 'application/json'
}
})
const results = response.ok ? await response.json() : []
$searchResultsContainer.querySelectorAll('*').forEach(el => el.remove())
$searchResultsContainer.append(
tmplSearchResults(results)
)
toggleStandaloneSearch()
// Analytics tracking
sendEvent({
type: 'search',
search_query: query
// search_context
})
}
// If on homepage, toggle results container if query is present
function toggleStandaloneSearch () {
if (!hasStandaloneSearch()) return
const query = $searchInput.value
const queryPresent = query && query.length > 0
const $results = document.querySelector('.ais-Hits')
// Primer classNames for showing and hiding the results container
const activeClass = $searchResultsContainer.getAttribute('data-active-class')
const inactiveClass = $searchResultsContainer.getAttribute('data-inactive-class')
if (!activeClass) {
console.error('container is missing required `data-active-class` attribute', $searchResultsContainer)
return
}
if (!inactiveClass) {
console.error('container is missing required `data-inactive-class` attribute', $searchResultsContainer)
return
}
// hide the container when no query is present
$searchResultsContainer.classList.toggle(activeClass, queryPresent)
$searchResultsContainer.classList.toggle(inactiveClass, !queryPresent)
if (queryPresent && $results) $results.style.display = 'block'
}
/** * Template functions ***/
function tmplSearchInput () {
// only autofocus on the homepage, and only if no #hash is present in the URL
const autofocus = (hasStandaloneSearch() && !location.hash.length) || null
const { div, form, input, button } = tags
return div(
{ class: 'ais-SearchBox' },
form(
{ role: 'search', class: 'ais-SearchBox-form', novalidate: true },
input({
class: 'ais-SearchBox-input',
type: 'search',
placeholder,
autofocus,
autocomplete: 'off',
autocorrect: 'off',
autocapitalize: 'off',
spellcheck: 'false',
maxlength: '512'
}),
button({
class: 'ais-SearchBox-submit',
type: 'submit',
title: 'Submit the search query.',
hidden: true
})
)
)
}
function tmplSearchResults (items) {
const { div, ol, li } = tags
return div(
{ class: 'ais-Hits', style: 'display:block' },
ol(
{ class: 'ais-Hits-list' },
items.map(item => li(
{ class: 'ais-Hits-item' },
tmplSearchResult(item)
))
)
)
}
function tmplSearchResult ({ url, breadcrumbs, heading, title, content }) {
const { div, a } = tags
return div(
{ class: 'search-result border-top border-gray-light py-3 px-2' },
a(
{ href: url, class: 'no-underline' },
div(
{ class: 'search-result-breadcrumbs d-block text-gray-dark opacity-60 text-small pb-1' },
// Remove redundant title from the end of breadcrumbs
emify((breadcrumbs || '').replace(` / ${title}`, ''))
),
div(
{ class: 'search-result-title d-block h4-mktg text-gray-dark' },
// Display page title and heading (if present exists)
emify(heading ? `${title}: ${heading}` : title)
),
div(
{ class: 'search-result-content d-block text-gray' },
// Truncate without breaking inner HTML tags
emify(truncate(content, maxContentLength))
)
)
)
}
// Allow em tags in search responses
function emify (text) {
const { em } = tags
return text
.split(/<\/?em>/g)
.map((el, i) => i % 2 ? em(el) : el)
}

View File

@@ -3,4 +3,6 @@ require('dotenv').config()
const algoliasearch = require('algoliasearch') const algoliasearch = require('algoliasearch')
const { ALGOLIA_APPLICATION_ID, ALGOLIA_API_KEY } = process.env const { ALGOLIA_APPLICATION_ID, ALGOLIA_API_KEY } = process.env
module.exports = algoliasearch(ALGOLIA_APPLICATION_ID, ALGOLIA_API_KEY) module.exports = function () {
return algoliasearch(ALGOLIA_APPLICATION_ID, ALGOLIA_API_KEY)
}

View File

@@ -1,13 +1,14 @@
const algoliaClient = require('./client') const { namePrefix } = require('./config')
const AlgoliaIndex = require('./search-index') const getAlgoliaClient = require('./algolia-client')
module.exports = async function getRemoteIndexNames () { module.exports = async function getRemoteIndexNames () {
const algoliaClient = getAlgoliaClient()
const indices = await algoliaClient.listIndexes() const indices = await algoliaClient.listIndexes()
// ignore other indices that may be present in the Algolia account like `helphub-`, etc // ignore other indices that may be present in the Algolia account like `helphub-`, etc
const indexNames = indices.items const indexNames = indices.items
.map(field => field.name) .map(field => field.name)
.filter(name => name.startsWith(AlgoliaIndex.namePrefix)) .filter(name => name.startsWith(namePrefix))
return indexNames return indexNames
} }

View File

@@ -1,17 +1,11 @@
const assert = require('assert') const { chain, chunk, difference } = require('lodash')
const { chain, chunk, difference, isArray, isString, inRange } = require('lodash')
const eventToPromise = require('event-to-promise') const eventToPromise = require('event-to-promise')
const objectHash = require('object-hash') const objectHash = require('object-hash')
const countArrayValues = require('count-array-values')
const isURL = require('is-url')
const rank = require('./rank') const rank = require('./rank')
const validateRecords = require('./validate-records')
const getAlgoliaClient = require('./algolia-client')
class AlgoliaIndex { class AlgoliaIndex {
// records must be truncated to avoid going over Algolia's 10K limit
static get maxRecordLength () { return 8000 }
static get maxContentLength () { return 5000 }
static get namePrefix () { return 'github-docs' }
constructor (name, records) { constructor (name, records) {
this.name = name this.name = name
this.records = records this.records = records
@@ -24,52 +18,14 @@ class AlgoliaIndex {
} }
validate () { validate () {
assert(isString(this.name) && this.name.length, '`name` is required') return validateRecords(this.name, this.records)
assert(isArray(this.records) && this.records.length, '`records` must be a non-empty array')
// each ID is unique
const objectIDs = this.records.map(record => record.objectID)
const dupes = countArrayValues(objectIDs)
.filter(({ value, count }) => count > 1)
.map(({ value }) => value)
assert(!dupes.length, `every objectID must be unique. dupes: ${dupes.join('; ')}`)
this.records.forEach(record => {
assert(
isString(record.objectID) && record.objectID.length,
`objectID must be a string. received: ${record.objectID}, ${JSON.stringify(record)}`
)
assert(
isString(record.title) && record.title.length,
`title must be a string. received: ${record.title}, ${JSON.stringify(record)}`
)
assert(
isURL(record.url),
`url must be a fully qualified URL. received: ${record.url}, ${JSON.stringify(record)}`
)
assert(
inRange(record.customRanking, 0, 4),
`customRanking must be an in-range number. received: ${record.customRanking}, (record: ${record.url})`
)
const recordLength = JSON.stringify(record).length
assert(
recordLength <= AlgoliaIndex.maxRecordLength,
`record ${record.url} is too long! ${recordLength} (max: ${AlgoliaIndex.maxRecordLength})`
)
})
return true
} }
// This method consumes Algolia's `browseAll` event emitter, // This method consumes Algolia's `browseAll` event emitter,
// aggregating results into an array of all the records // aggregating results into an array of all the records
// https://www.algolia.com/doc/api-reference/api-methods/browse/ // https://www.algolia.com/doc/api-reference/api-methods/browse/
async fetchExistingRecords () { async fetchExistingRecords () {
const client = require('./client') const client = getAlgoliaClient()
// return an empty array if the index does not exist yet // return an empty array if the index does not exist yet
const { items: indices } = await client.listIndexes() const { items: indices } = await client.listIndexes()
@@ -97,7 +53,7 @@ class AlgoliaIndex {
} }
async syncWithRemote () { async syncWithRemote () {
const client = require('./client') const client = getAlgoliaClient()
console.log('\n\nsyncing %s with remote', this.name) console.log('\n\nsyncing %s with remote', this.name)
this.validate() this.validate()

6
lib/search/config.js Normal file
View File

@@ -0,0 +1,6 @@
module.exports = {
// records must be truncated to avoid going over Algolia's 10K limit
maxRecordLength: 8000,
maxContentLength: 5000,
namePrefix: 'github-docs'
}

View File

@@ -4,11 +4,11 @@
const { chain } = require('lodash') const { chain } = require('lodash')
const urlPrefix = 'https://docs.github.com' const urlPrefix = 'https://docs.github.com'
const AlgoliaIndex = require('./search-index')
const ignoredHeadingSlugs = [ const ignoredHeadingSlugs = [
'in-this-article', 'in-this-article',
'further-reading' 'further-reading'
] ]
const { maxContentLength } = require('./config')
module.exports = function parsePageSectionsIntoRecords (href, $) { module.exports = function parsePageSectionsIntoRecords (href, $) {
const title = $('h1').text().trim() const title = $('h1').text().trim()
@@ -46,7 +46,7 @@ module.exports = function parsePageSectionsIntoRecords (href, $) {
.get() .get()
.join(' ') .join(' ')
.trim() .trim()
.slice(0, AlgoliaIndex.maxContentLength) .slice(0, maxContentLength)
return { return {
objectID, objectID,
url, url,
@@ -67,7 +67,7 @@ module.exports = function parsePageSectionsIntoRecords (href, $) {
.get() .get()
.join(' ') .join(' ')
.trim() .trim()
.slice(0, AlgoliaIndex.maxContentLength) .slice(0, maxContentLength)
records = [{ records = [{
objectID, objectID,

View File

@@ -6,14 +6,17 @@ const chalk = require('chalk')
const languages = require('../languages') const languages = require('../languages')
const buildRecords = require('./build-records') const buildRecords = require('./build-records')
const findIndexablePages = require('./find-indexable-pages') const findIndexablePages = require('./find-indexable-pages')
const getRemoteIndexNames = require('./get-remote-index-names') const cacheDir = path.join(process.cwd(), './.search-cache')
const Index = require('./search-index')
const cacheDir = path.join(process.cwd(), './.algolia-cache')
const allVersions = require('../all-versions') const allVersions = require('../all-versions')
const { namePrefix } = require('./config')
// Algolia
const getRemoteIndexNames = require('./algolia-get-remote-index-names')
const AlgoliaIndex = require('./algolia-search-index')
// Build a search data file for every combination of product version and language // Build a search data file for every combination of product version and language
// e.g. `github-docs-dotcom-en.json` and `github-docs-2.14-ja.json` // e.g. `github-docs-dotcom-en.json` and `github-docs-2.14-ja.json`
module.exports = async function syncAlgoliaIndices (opts = {}) { module.exports = async function syncSearchIndexes (opts = {}) {
if (opts.dryRun) { if (opts.dryRun) {
console.log('This is a dry run! The script will build the indices locally but not upload anything.\n') console.log('This is a dry run! The script will build the indices locally but not upload anything.\n')
rimraf(cacheDir) rimraf(cacheDir)
@@ -60,11 +63,11 @@ module.exports = async function syncAlgoliaIndices (opts = {}) {
: allVersions[pageVersion].miscBaseName : allVersions[pageVersion].miscBaseName
// github-docs-dotcom-en, github-docs-2.22-en // github-docs-dotcom-en, github-docs-2.22-en
const indexName = `${Index.namePrefix}-${indexVersion}-${languageCode}` const indexName = `${namePrefix}-${indexVersion}-${languageCode}`
// The page version will be the new version, e.g., free-pro-team@latest, enterprise-server@2.22 // The page version will be the new version, e.g., free-pro-team@latest, enterprise-server@2.22
const records = await buildRecords(indexName, indexablePages, pageVersion, languageCode) const records = await buildRecords(indexName, indexablePages, pageVersion, languageCode)
const index = new Index(indexName, records) const index = new AlgoliaIndex(indexName, records)
if (opts.dryRun) { if (opts.dryRun) {
const cacheFile = path.join(cacheDir, `${indexName}.json`) const cacheFile = path.join(cacheDir, `${indexName}.json`)
@@ -87,7 +90,7 @@ module.exports = async function syncAlgoliaIndices (opts = {}) {
) )
if (!process.env.CI) { if (!process.env.CI) {
console.log(chalk.green(`\nCached remote index names in ${path.relative(process.cwd(), cachedIndexNamesFile)}`)) console.log(chalk.green(`\nCached index names in ${path.relative(process.cwd(), cachedIndexNamesFile)}`))
console.log(chalk.green('(If this file has any changes, please commit them)')) console.log(chalk.green('(If this file has any changes, please commit them)'))
} }

View File

@@ -0,0 +1,47 @@
const assert = require('assert')
const { isArray, isString, inRange } = require('lodash')
const isURL = require('is-url')
const countArrayValues = require('count-array-values')
const { maxRecordLength } = require('./config')
module.exports = function validateRecords (name, records) {
assert(isString(name) && name.length, '`name` is required')
assert(isArray(records) && records.length, '`records` must be a non-empty array')
// each ID is unique
const objectIDs = records.map(record => record.objectID)
const dupes = countArrayValues(objectIDs)
.filter(({ value, count }) => count > 1)
.map(({ value }) => value)
assert(!dupes.length, `every objectID must be unique. dupes: ${dupes.join('; ')}`)
records.forEach(record => {
assert(
isString(record.objectID) && record.objectID.length,
`objectID must be a string. received: ${record.objectID}, ${JSON.stringify(record)}`
)
assert(
isString(record.title) && record.title.length,
`title must be a string. received: ${record.title}, ${JSON.stringify(record)}`
)
assert(
isURL(record.url),
`url must be a fully qualified URL. received: ${record.url}, ${JSON.stringify(record)}`
)
assert(
inRange(record.customRanking, 0, 4),
`customRanking must be an in-range number. received: ${record.customRanking}, (record: ${record.url})`
)
const recordLength = JSON.stringify(record).length
assert(
recordLength <= maxRecordLength,
`record ${record.url} is too long! ${recordLength} (max: ${maxRecordLength})`
)
})
return true
}

13
lib/search/versions.js Normal file
View File

@@ -0,0 +1,13 @@
const allVersions = require('../all-versions')
module.exports = new Set(
Object.values(allVersions)
.map(version =>
// if GHES, resolves to the release number like 2.21, 2.22, etc.
// if FPT, resolves to 'dotcom'
// if GHAE, resolves to 'ghae'
version.plan === 'enterprise-server'
? version.currentRelease
: version.miscBaseName
)
)

View File

@@ -66,6 +66,7 @@ module.exports = function (app) {
app.use('/public', express.static('data/graphql')) app.use('/public', express.static('data/graphql'))
app.use('/events', require('./events')) app.use('/events', require('./events'))
app.use('/csrf', require('./csrf-route')) app.use('/csrf', require('./csrf-route'))
app.use('/search', require('./search'))
app.use(require('./archived-enterprise-versions')) app.use(require('./archived-enterprise-versions'))
app.use(require('./robots')) app.use(require('./robots'))
app.use(/(\/.*)?\/early-access$/, require('./contextualizers/early-access-links')) app.use(/(\/.*)?\/early-access$/, require('./contextualizers/early-access-links'))

57
middleware/search.js Normal file
View File

@@ -0,0 +1,57 @@
const express = require('express')
const algoliasearch = require('algoliasearch')
const { namePrefix } = require('../lib/search/config')
const languages = new Set(Object.keys(require('../lib/languages')))
const versions = require('../lib/search/versions')
const router = express.Router()
// https://www.algolia.com/apps/ZI5KPY1HBE/dashboard
// This API key is public. There's also a private API key for writing to the Algolia API
const searchClient = algoliasearch('ZI5KPY1HBE', '685df617246c3a10abba589b4599288f')
async function loadAlgoliaResults ({ version, language, query, limit }) {
const indexName = `${namePrefix}-${version}-${language}`
const index = searchClient.initIndex(indexName)
// allows "phrase queries" and "prohibit operator"
// https://www.algolia.com/doc/api-reference/api-parameters/advancedSyntax/
const { hits } = await index.search(query, {
hitsPerPage: limit,
advancedSyntax: true
})
return hits.map(hit => ({
url: hit.url,
breadcrumbs: hit._highlightResult.breadcrumbs.value,
heading: hit._highlightResult.heading.value,
title: hit._highlightResult.title.value,
content: hit._highlightResult.content.value
}))
}
router.get('/', async (req, res) => {
res.set({
'surrogate-control': 'private, no-store',
'cache-control': 'private, no-store'
})
const { query, version, language } = req.query
const limit = Math.min(parseInt(req.query.limit, 10) || 10, 100)
if (!versions.has(version) || !languages.has(language)) {
return res.status(400).json([])
}
if (!query || !limit) {
return res.status(200).json([])
}
try {
const results = await loadAlgoliaResults({ version, language, query, limit })
return res.status(200).json(results)
} catch (err) {
console.error(err)
return res.status(400).json([])
}
})
module.exports = router

View File

@@ -57,7 +57,6 @@
"html-truncate": "^1.2.2", "html-truncate": "^1.2.2",
"hubdown": "^2.6.0", "hubdown": "^2.6.0",
"imurmurhash": "^0.1.4", "imurmurhash": "^0.1.4",
"instantsearch.js": "^4.8.2",
"ioredis": "^4.19.4", "ioredis": "^4.19.4",
"ioredis-mock": "^5.2.0", "ioredis-mock": "^5.2.0",
"is-url": "^1.2.4", "is-url": "^1.2.4",
@@ -166,7 +165,7 @@
"sync-search": "start-server-and-test sync-search-server 4002 sync-search-indices", "sync-search": "start-server-and-test sync-search-server 4002 sync-search-indices",
"sync-search-dry-run": "DRY_RUN=1 npm run sync-search", "sync-search-dry-run": "DRY_RUN=1 npm run sync-search",
"sync-search-server": "cross-env NODE_ENV=production PORT=4002 node server.js", "sync-search-server": "cross-env NODE_ENV=production PORT=4002 node server.js",
"sync-search-indices": "script/sync-algolia-search-indices.js", "sync-search-indices": "script/sync-search-indices.js",
"test-watch": "jest --watch --notify --notifyMode=change --coverage", "test-watch": "jest --watch --notify --notifyMode=change --coverage",
"check-deps": "node script/check-deps.js", "check-deps": "node script/check-deps.js",
"prevent-pushes-to-main": "node script/prevent-pushes-to-main.js", "prevent-pushes-to-main": "node script/prevent-pushes-to-main.js",

View File

@@ -32,7 +32,6 @@ const main = async () => {
'@babel/*', '@babel/*',
'babel-preset-env', 'babel-preset-env',
'@primer/*', '@primer/*',
'instantsearch.js',
'querystring', 'querystring',
'pa11y-ci', 'pa11y-ci',
'sass', 'sass',

View File

@@ -2,8 +2,8 @@
// [start-readme] // [start-readme]
// //
// This script is run automatically via GitHub Actions on every push to `master` to generate searchable data // This script is run automatically via GitHub Actions on every push to `main` to generate searchable data.
// and upload it to our Algolia account. It can also be run manually. For more info see [contributing/search.md](contributing/search.md) // It can also be run manually. For more info see [contributing/search.md](contributing/search.md)
// //
// [end-readme] // [end-readme]
@@ -12,7 +12,7 @@ require('make-promises-safe')
main() main()
async function main () { async function main () {
const sync = require('../lib/algolia/sync') const sync = require('../lib/search/sync')
const opts = { const opts = {
dryRun: 'DRY_RUN' in process.env, dryRun: 'DRY_RUN' in process.env,
language: process.env.LANGUAGE, language: process.env.LANGUAGE,

View File

@@ -3,7 +3,7 @@
/* Global styles /* Global styles
Gets applied to both the search input on homepage and in the header nav Gets applied to both the search input on homepage and in the header nav
Form and inputs using .ais- prefix gets added by Algolia InstantSearch.js */ Form and inputs using .ais- prefix gets added by search.js */
.ais-SearchBox { .ais-SearchBox {
position: relative; position: relative;
} }

View File

@@ -42,84 +42,45 @@ describe('algolia browser search', () => {
}) })
it('sends the correct data to algolia for Enterprise Server', async () => { it('sends the correct data to algolia for Enterprise Server', async () => {
expect.assertions(12) // 3 assertions x 4 letters ('test') expect.assertions(2)
const newPage = await browser.newPage() const newPage = await browser.newPage()
await newPage.goto('http://localhost:4001/ja/enterprise/2.22/admin/installation') await newPage.goto('http://localhost:4001/ja/enterprise/2.22/admin/installation')
await newPage.setRequestInterception(true) await newPage.setRequestInterception(true)
newPage.on('request', interceptedRequest => { newPage.on('request', interceptedRequest => {
if (interceptedRequest.method() === 'POST' && /algolia/i.test(interceptedRequest.url())) { if (interceptedRequest.method() === 'GET' && /search/i.test(interceptedRequest.url())) {
const data = JSON.parse(interceptedRequest.postData()) const { version, language } = querystring.parse(interceptedRequest.url())
const { indexName, params } = data.requests[0] expect(version).toBe('2.22')
const parsedParams = querystring.parse(params) expect(language).toBe('ja')
const analyticsTags = JSON.parse(parsedParams.analyticsTags)
expect(indexName).toBe('github-docs-2.22-ja')
expect(analyticsTags).toHaveLength(2)
// browser tests are run against production build, so we are expecting env:production
expect(analyticsTags).toEqual(expect.arrayContaining(['site:docs.github.com', 'env:production']))
} }
interceptedRequest.continue() interceptedRequest.continue()
}) })
await newPage.click('#search-input-container input[type="search"]') await newPage.click('#search-input-container input[type="search"]')
await newPage.type('#search-input-container input[type="search"]', 'test') await newPage.type('#search-input-container input[type="search"]', 'test')
await newPage.waitForSelector('.search-result')
}) })
it('sends the correct data to algolia for GHAE', async () => { it('sends the correct data to algolia for GHAE', async () => {
expect.assertions(12) // 3 assertions x 4 letters ('test') expect.assertions(2)
const newPage = await browser.newPage() const newPage = await browser.newPage()
await newPage.goto('http://localhost:4001/en/github-ae@latest/admin/overview') await newPage.goto('http://localhost:4001/en/github-ae@latest/admin/overview')
await newPage.setRequestInterception(true) await newPage.setRequestInterception(true)
newPage.on('request', interceptedRequest => { newPage.on('request', interceptedRequest => {
if (interceptedRequest.method() === 'POST' && /algolia/i.test(interceptedRequest.url())) { if (interceptedRequest.method() === 'GET' && /search/i.test(interceptedRequest.url())) {
const data = JSON.parse(interceptedRequest.postData()) const { version, language } = querystring.parse(interceptedRequest.url())
const { indexName, params } = data.requests[0] expect(version).toBe('ghae')
const parsedParams = querystring.parse(params) expect(language).toBe('en')
const analyticsTags = JSON.parse(parsedParams.analyticsTags)
expect(indexName).toBe('github-docs-ghae-en')
expect(analyticsTags).toHaveLength(2)
// browser tests are run against production build, so we are expecting env:production
expect(analyticsTags).toEqual(expect.arrayContaining(['site:docs.github.com', 'env:production']))
} }
interceptedRequest.continue() interceptedRequest.continue()
}) })
await newPage.click('#search-input-container input[type="search"]') await newPage.click('#search-input-container input[type="search"]')
await newPage.type('#search-input-container input[type="search"]', 'test') await newPage.type('#search-input-container input[type="search"]', 'test')
}) await newPage.waitForSelector('.search-result')
it('removes `algolia-query` query param after page load', async () => {
await page.goto('http://localhost:4001/en?algolia-query=helpme')
// check that the query is still present at page load
let location = await getLocationObject(page)
expect(location.search).toBe('?algolia-query=helpme')
// query removal is in a setInterval, so wait a bit
await sleep(1000)
// check that the query has been removed after a bit
location = await getLocationObject(page)
expect(location.search).toBe('')
})
it('does not remove hash when removing `algolia-query` query', async () => {
await page.goto('http://localhost:4001/en?algolia-query=helpme#some-header')
// check that the query is still present at page load
let location = await getLocationObject(page)
expect(location.search).toBe('?algolia-query=helpme')
// query removal is in a setInterval, so wait a bit
await sleep(1000)
// check that the query has been removed after a bit
location = await getLocationObject(page)
expect(location.search).toBe('')
expect(location.hash).toBe('#some-header')
}) })
}) })
@@ -166,13 +127,6 @@ describe('csrf meta', () => {
}) })
}) })
async function getLocationObject (page) {
const location = await page.evaluate(() => {
return Promise.resolve(JSON.stringify(window.location, null, 2))
})
return JSON.parse(location)
}
describe('platform specific content', () => { describe('platform specific content', () => {
// from tests/javascripts/user-agent.js // from tests/javascripts/user-agent.js
const userAgents = [ const userAgents = [

View File

@@ -1,14 +1,14 @@
const { dates, supported } = require('../../lib/enterprise-server-releases') const { dates, supported } = require('../../lib/enterprise-server-releases')
const languageCodes = Object.keys(require('../../lib/languages')) const languageCodes = Object.keys(require('../../lib/languages'))
const AlgoliaIndex = require('../../lib/algolia/search-index') const { namePrefix } = require('../../lib/search/config')
const remoteIndexNames = require('../../lib/algolia/cached-index-names.json') const remoteIndexNames = require('../../lib/search/cached-index-names.json')
describe('algolia', () => { describe('algolia', () => {
test('has remote indexNames in every language for every supported GHE version', () => { test('has remote indexNames in every language for every supported GHE version', () => {
expect(supported.length).toBeGreaterThan(1) expect(supported.length).toBeGreaterThan(1)
supported.forEach(version => { supported.forEach(version => {
languageCodes.forEach(languageCode => { languageCodes.forEach(languageCode => {
const indexName = `${AlgoliaIndex.namePrefix}-${version}-${languageCode}` const indexName = `${namePrefix}-${version}-${languageCode}`
// workaround for GHES release branches not in production yet // workaround for GHES release branches not in production yet
if (!remoteIndexNames.includes(indexName)) { if (!remoteIndexNames.includes(indexName)) {
@@ -28,7 +28,7 @@ describe('algolia', () => {
test('has remote indexNames in every language for dotcom', async () => { test('has remote indexNames in every language for dotcom', async () => {
expect(languageCodes.length).toBeGreaterThan(0) expect(languageCodes.length).toBeGreaterThan(0)
languageCodes.forEach(languageCode => { languageCodes.forEach(languageCode => {
const indexName = `${AlgoliaIndex.namePrefix}-dotcom-${languageCode}` const indexName = `${namePrefix}-dotcom-${languageCode}`
expect(remoteIndexNames.includes(indexName)).toBe(true) expect(remoteIndexNames.includes(indexName)).toBe(true)
}) })
}) })

View File

@@ -1,13 +1,13 @@
const fs = require('fs') const fs = require('fs')
const path = require('path') const path = require('path')
const cheerio = require('cheerio') const cheerio = require('cheerio')
const parsePageSectionsIntoRecords = require('../../../lib/algolia/parse-page-sections-into-records') const parsePageSectionsIntoRecords = require('../../../lib/search/parse-page-sections-into-records')
const fixtures = { const fixtures = {
pageWithSections: fs.readFileSync(path.join(__dirname, 'fixtures/page-with-sections.html'), 'utf8'), pageWithSections: fs.readFileSync(path.join(__dirname, 'fixtures/page-with-sections.html'), 'utf8'),
pageWithoutSections: fs.readFileSync(path.join(__dirname, 'fixtures/page-without-sections.html'), 'utf8') pageWithoutSections: fs.readFileSync(path.join(__dirname, 'fixtures/page-without-sections.html'), 'utf8')
} }
describe('algolia parsePageSectionsIntoRecords module', () => { describe('search parsePageSectionsIntoRecords module', () => {
test('works for pages with sections', () => { test('works for pages with sections', () => {
const html = fixtures.pageWithSections const html = fixtures.pageWithSections
const $ = cheerio.load(html) const $ = cheerio.load(html)

View File

@@ -1,6 +1,6 @@
const rank = require('../../../lib/algolia/rank') const rank = require('../../../lib/search/rank')
test('algolia custom rankings', () => { test('search custom rankings', () => {
const expectedRankings = [ const expectedRankings = [
['https://docs.github.com/en/github/actions', 3], ['https://docs.github.com/en/github/actions', 3],
['https://docs.github.com/en/rest/reference', 2], ['https://docs.github.com/en/rest/reference', 2],

View File

@@ -86,12 +86,5 @@ module.exports = {
] ]
}), }),
new EnvironmentPlugin(['NODE_ENV']) new EnvironmentPlugin(['NODE_ENV'])
], ]
resolve: {
alias: {
// Hogan uses `new Function` which breaks content security policy
// Turns out, we aren't even using it anyways!
'hogan.js': path.resolve(__dirname, 'javascripts/fake-hogan.js')
}
}
} }