1
0
mirror of synced 2026-01-28 18:03:28 -05:00

Merge pull request #22006 from github/repo-sync

repo sync
This commit is contained in:
Octomerger Bot
2022-11-14 05:18:31 -08:00
committed by GitHub
7 changed files with 233 additions and 74 deletions

View File

@@ -16,24 +16,33 @@ import { Link } from 'components/Link'
import styles from './Search.module.scss'
// The search endpoint used prior to using /api/search/legacy was
// just /search, which used middleware/search.js. We are leaving that
// middleware in tact to allow folks that previously used the /search
// endpoint to continue doing so. But, we changed the endpoint used by
// the search input on docs.github.com to use the new /api/search/legacy
// endpoint.
// Eventually, we will deprecate the /search and /api/search/legacy
// endpoints and use the /api/search/v1 endpoint, which has
// a different response JSON format.
const SEARCH_API_ENDPOINT = '/api/search/legacy'
const SEARCH_API_ENDPOINT = '/api/search/v1'
type SearchResult = {
type Hit = {
id: string
url: string
breadcrumbs: string
title: string
content: string
score: number
popularity: number
breadcrumbs: string
highlights: {
title: Array<string>
content: Array<string>
}
score?: number
popularity?: number
es_url?: string
}
// Note, the JSON will contain other keys too, but since we don't use
// them in this component, we don't need to specify them.
type Meta = {
found: {
value: number
}
}
type Data = {
hits: Hit[]
meta: Meta
}
type Props = {
@@ -73,10 +82,14 @@ export function Search({
language,
version,
query,
// In its current state, can't with confidence know if the user
// initiated a search because of the input debounce or if the
// user has finished typing.
autocomplete: 'true',
})}`
: null
const { data: results, error: searchError } = useSWR<SearchResult[], Error>(
const { data, error: searchError } = useSWR<Data, Error>(
fetchURL,
async (url: string) => {
const response = await fetch(url)
@@ -102,14 +115,14 @@ export function Search({
}
)
const [previousResults, setPreviousResults] = useState<SearchResult[] | undefined>()
const [previousData, setPreviousData] = useState<Data | undefined>()
useEffect(() => {
if (results) {
setPreviousResults(results)
if (data) {
setPreviousData(data)
} else if (!query) {
setPreviousResults(undefined)
setPreviousData(undefined)
}
}, [results, query])
}, [data, query])
// The `isLoading` boolean will become false every time the useSWR hook
// fires off a new XHR. So it toggles from false/true often.
@@ -124,7 +137,7 @@ export function Search({
// mean saying "Loading..." is a lie!
// That's why we combine them into a final one. We're basically doing
// this to favor *NOT* saying "Loading...".
const isLoadingRaw = Boolean(query && !results && !searchError)
const isLoadingRaw = Boolean(query && !data && !searchError)
const [isLoadingDebounced] = useDebounce<boolean>(isLoadingRaw, 500)
const isLoading = isLoadingRaw && isLoadingDebounced
@@ -216,7 +229,7 @@ export function Search({
isHeaderSearch={isHeaderSearch}
isMobileSearch={isMobileSearch}
isLoading={isLoading}
results={previousResults}
results={previousData}
closeSearch={closeSearch}
debug={debug}
query={query}
@@ -342,7 +355,7 @@ function ShowSearchResults({
isHeaderSearch: boolean
isMobileSearch: boolean
isLoading: boolean
results: SearchResult[] | undefined
results: Data | undefined
closeSearch: () => void
debug: boolean
query: string
@@ -461,11 +474,33 @@ function ShowSearchResults({
{t('search_results_for')}: {query}
</h1>
<p className="ml-4 mb-4 text-normal f5">
{t('matches_displayed')}: {results.length === 0 ? t('no_results') : results.length}
{results.meta.found.value === 0 ? (
t('no_results')
) : (
<span>
{t('matches_found')}: {results.meta.found.value.toLocaleString()}
</span>
)}
.{' '}
{results.meta.found.value > results.hits.length && (
<span>
{t('matches_displayed')}:{' '}
{results.meta.found.value === 0 ? t('no_results') : results.hits.length}
</span>
)}
</p>
<ActionList variant="full">
{results.map(({ url, breadcrumbs, title, content, score, popularity }, index) => {
{results.hits.map((hit, index) => {
const { url, breadcrumbs, title, highlights, score, popularity } = hit
const contentHTML =
highlights.content && highlights.content.length > 0
? highlights.content.join('<br>')
: ''
const titleHTML =
highlights.title && highlights.title.length > 0 ? highlights.title[0] : title
return (
<ActionList.Item className="width-full" key={url}>
<Link
@@ -476,8 +511,8 @@ function ShowSearchResults({
type: EventType.searchResult,
search_result_query: Array.isArray(query) ? query[0] : query,
search_result_index: index,
search_result_total: results.length,
search_result_rank: (results.length - index) / results.length,
search_result_total: results.hits.length,
search_result_rank: (results.hits.length - index) / results.hits.length,
search_result_url: url,
})
}}
@@ -487,17 +522,10 @@ function ShowSearchResults({
className={cx('list-style-none', styles.resultsContainer)}
>
<div className={cx('py-2 px-3')}>
{/* Breadcrumbs in search records don't include the page title. These fields may contain <mark> elements that we need to render */}
<Label size="small" variant="accent">
{breadcrumbs.length === 0
? title.replace(/<\/?[^>]+(>|$)|(\/)/g, '')
: breadcrumbs
.split(' / ')
.slice(0, 1)
.join(' ')
.replace(/<\/?[^>]+(>|$)|(\/)/g, '')}
{breadcrumbs ? breadcrumbs.split(' / ')[0] : title}
</Label>
{debug && (
{debug && score !== undefined && popularity !== undefined && (
<small className="float-right">
score: {score.toFixed(4)} popularity: {popularity.toFixed(4)}
</small>
@@ -505,13 +533,13 @@ function ShowSearchResults({
<h2
className={cx('mt-2 text-normal f3 d-block')}
dangerouslySetInnerHTML={{
__html: title,
__html: titleHTML,
}}
/>
<div
className={cx(styles.searchResultContent, 'mt-1 d-block overflow-hidden')}
style={{ maxHeight: '2.5rem' }}
dangerouslySetInnerHTML={{ __html: content }}
dangerouslySetInnerHTML={{ __html: contentHTML }}
/>
<div
className={'d-block mt-2 opacity-70 text-small'}

View File

@@ -32,6 +32,7 @@ search:
no_results: No results found
search_results_for: Search results for
no_content: No content
matches_found: Results found
matches_displayed: Matches displayed
search_error: An error occurred trying to perform the search.
description: Enter a search term to find it in the GitHub Documentation.

View File

@@ -1,5 +1,8 @@
import { Client } from '@elastic/elasticsearch'
export const POSSIBLE_HIGHLIGHT_FIELDS = ['title', 'content', 'headings']
export const DEFAULT_HIGHLIGHT_FIELDS = ['title', 'content']
const ELASTICSEARCH_URL = process.env.ELASTICSEARCH_URL
const isDevMode = process.env.NODE_ENV !== 'production'
@@ -37,6 +40,7 @@ export async function getSearchResults({
topics,
includeTopics,
usePrefixSearch,
highlights,
}) {
if (topics && !Array.isArray(topics)) {
throw new Error("'topics' has to be an array")
@@ -72,7 +76,8 @@ export async function getSearchResults({
matchQuery.bool.filter = topicsFilter
}
const highlight = getHighlightConfiguration(query)
const highlightFields = highlights || DEFAULT_HIGHLIGHT_FIELDS
const highlight = getHighlightConfiguration(query, highlightFields)
const searchQuery = {
highlight,
@@ -141,7 +146,7 @@ export async function getSearchResults({
// const hitsAll = result.hits // ES >7.11
const hitsAll = result.body // ES <=7.11
const hits = getHits(hitsAll.hits.hits, { indexName, debug, includeTopics })
const hits = getHits(hitsAll.hits.hits, { indexName, debug, includeTopics, highlightFields })
const t1 = new Date()
const meta = {
@@ -345,14 +350,26 @@ function getMatchQueries(query, { usePrefixSearch, fuzzy }) {
return matchQueries
}
function getHits(hits, { indexName, debug, includeTopics }) {
function getHits(hits, { indexName, debug, includeTopics, highlightFields }) {
return hits.map((hit) => {
// Return `hit.highlights[...]` based on the highlight fields requested.
// So if you searched with `&highlights=headings&highlights=content`
// this will become:
// {
// content: [...],
// headings: [...]
// }
// even if there was a match on 'title'.
const hitHighlights = Object.fromEntries(
highlightFields.map((key) => [key, (hit.highlight && hit.highlight[key]) || []])
)
const result = {
id: hit._id,
url: hit._source.url,
title: hit._source.title,
breadcrumbs: hit._source.breadcrumbs,
highlights: hit.highlight || {},
highlights: hitHighlights,
}
if (includeTopics) {
result.topics = hit._source.topics || []
@@ -374,31 +391,38 @@ function getHits(hits, { indexName, debug, includeTopics }) {
// of highlights of content under each title. If we feel it shows too
// many highlights in the search result UI, we can come back here
// and change it to something more appropriate.
function getHighlightConfiguration(query) {
return {
pre_tags: ['<mark>'],
post_tags: ['</mark>'],
fields: {
title: {
fragment_size: 200,
number_of_fragments: 1,
},
headings: { fragment_size: 150, number_of_fragments: 2 },
// The 'no_match_size' is so we can display *something* for the
// preview if there was no highlight match at all within the content.
content: {
fragment_size: 150,
number_of_fragments: 3,
no_match_size: 150,
function getHighlightConfiguration(query, highlights) {
const fields = {}
if (highlights.includes('title')) {
fields.title = {
fragment_size: 200,
number_of_fragments: 1,
}
}
if (highlights.includes('headings')) {
fields.headings = { fragment_size: 150, number_of_fragments: 2 }
}
if (highlights.includes('content')) {
// The 'no_match_size' is so we can display *something* for the
// preview if there was no highlight match at all within the content.
fields.content = {
fragment_size: 150,
number_of_fragments: 3,
no_match_size: 150,
highlight_query: {
match_phrase_prefix: {
content: {
query,
},
highlight_query: {
match_phrase_prefix: {
content: {
query,
},
},
},
},
}
}
return {
pre_tags: ['<mark>'],
post_tags: ['</mark>'],
fields,
}
}

View File

@@ -7,7 +7,11 @@ import { allVersions } from '../../lib/all-versions.js'
import statsd from '../../lib/statsd.js'
import { defaultCacheControl } from '../cache-control.js'
import catchMiddlewareError from '../catch-middleware-error.js'
import { getSearchResults } from './es-search.js'
import {
getSearchResults,
POSSIBLE_HIGHLIGHT_FIELDS,
DEFAULT_HIGHLIGHT_FIELDS,
} from './es-search.js'
// Used by the legacy search
const versions = new Set(Object.values(searchVersions))
@@ -182,7 +186,21 @@ const validationMiddleware = (req, res, next) => {
validate: (v) => v >= 1 && v <= 10,
},
{ key: 'sort', default_: DEFAULT_SORT, validate: (v) => POSSIBLE_SORTS.includes(v) },
{ key: 'debug', default_: Boolean(process.env.NODE_ENV === 'development' || req.query.debug) },
{
key: 'highlights',
default_: DEFAULT_HIGHLIGHT_FIELDS,
cast: (v) => (Array.isArray(v) ? v : [v]),
validate: (v) => {
for (const highlight of v) {
if (!POSSIBLE_HIGHLIGHT_FIELDS.includes(highlight)) {
throw new ValidationError(`highlight value '${highlight}' is not valid`)
}
}
return true
},
},
{ key: 'autocomplete', default_: false, cast: toBoolean },
{ key: 'debug', default_: process.env.NODE_ENV === 'development', cast: toBoolean },
]
const search = {}
@@ -221,11 +239,16 @@ const validationMiddleware = (req, res, next) => {
return next()
}
function toBoolean(value) {
if (value === 'true' || value === '1') return true
return false
}
router.get(
'/v1',
validationMiddleware,
catchMiddlewareError(async function search(req, res) {
const { indexName, query, page, size, debug, sort } = req.search
const { indexName, query, autocomplete, page, size, debug, sort, highlights } = req.search
// The getSearchResults() function is a mix of preparing the search,
// sending & receiving it, and post-processing the response from the
@@ -236,7 +259,16 @@ router.get(
const tags = ['version:v1', `indexName:${indexName}`]
const timed = statsd.asyncTimer(getSearchResults, 'api.search', tags)
const options = { indexName, query, page, size, debug, sort }
const options = {
indexName,
query,
page,
size,
debug,
sort,
highlights,
usePrefixSearch: autocomplete,
}
try {
const { meta, hits } = await timed(options)

View File

@@ -91,7 +91,7 @@ describe('browser search', () => {
newPage.on('request', (interceptedRequest) => {
if (
interceptedRequest.method() === 'GET' &&
/api\/search\/legacy\?/i.test(interceptedRequest.url())
/api\/search\/v1\?/i.test(interceptedRequest.url())
) {
const { searchParams } = new URL(interceptedRequest.url())
expect(searchParams.get('version')).toBe('dotcom')
@@ -119,7 +119,7 @@ describe('browser search', () => {
newPage.on('request', (interceptedRequest) => {
if (
interceptedRequest.method() === 'GET' &&
/api\/search\/legacy\?/i.test(interceptedRequest.url())
/api\/search\/v1\?/i.test(interceptedRequest.url())
) {
const { searchParams } = new URL(interceptedRequest.url())
expect(searchParams.get('version')).toBe('ghae')

View File

@@ -57,8 +57,11 @@ describeIfElasticsearchURL('search v1 middleware', () => {
expect(hit.url).toBe('/en/foo')
expect(hit.title).toBe('Foo')
expect(hit.breadcrumbs).toBe('fooing')
// By default, 'title' and 'content' is included in highlights,
// but not 'headings'
expect(hit.highlights.title[0]).toBe('<mark>Foo</mark>')
expect(hit.highlights.content[0]).toMatch('<mark>foo</mark>')
expect(hit.highlights.headings).toBeUndefined()
// Check that it can be cached at the CDN
expect(res.headers['set-cookie']).toBeUndefined()
@@ -83,6 +86,36 @@ describeIfElasticsearchURL('search v1 middleware', () => {
expect(hit.es_url).toBeTruthy()
})
test('search with and without autocomplete on', async () => {
// *Without* autocomplete=true
{
const sp = new URLSearchParams()
sp.set('query', 'sill')
const res = await get('/api/search/v1?' + sp)
expect(res.statusCode).toBe(200)
const results = JSON.parse(res.text)
// Fixtures contains no word called 'sill'. It does contain the term
// 'silly' which, in English, becomes 'silli` when stemmed.
// Because we don't use `&autocomplete=true` this time, we expect
// to find nothing.
expect(results.meta.found.value).toBe(0)
}
// *With* autocomplete=true
{
const sp = new URLSearchParams()
sp.set('query', 'sill')
sp.set('autocomplete', 'true')
const res = await get('/api/search/v1?' + sp)
expect(res.statusCode).toBe(200)
const results = JSON.parse(res.text)
expect(results.meta.found.value).toBeGreaterThanOrEqual(1)
const hit = results.hits[0]
const contentHighlights = hit.highlights.content
expect(contentHighlights[0]).toMatch('<mark>silly</mark>')
}
})
test('find nothing', async () => {
const sp = new URLSearchParams()
sp.set('query', 'xojixjoiwejhfoiuwehjfioweufhj')
@@ -93,6 +126,38 @@ describeIfElasticsearchURL('search v1 middleware', () => {
expect(results.meta.found.value).toBe(0)
})
test('configurable highlights', async () => {
const sp = new URLSearchParams()
sp.set('query', 'introduction heading')
sp.append('highlights', 'headings')
sp.append('highlights', 'content')
const res = await get('/api/search/v1?' + sp)
expect(res.statusCode).toBe(200)
const results = JSON.parse(res.text)
expect(results.meta.found.value).toBeGreaterThanOrEqual(1)
for (const hit of results.hits) {
expect(hit.highlights.title).toBeFalsy()
expect(hit.highlights.headings).toBeTruthy()
expect(hit.highlights.content).toBeTruthy()
}
})
test('highlights keys matches highlights configuration', async () => {
const sp = new URLSearchParams()
// This will match because it's in the 'content' but not in 'headings'
sp.set('query', 'Fact of life')
sp.set('highlights', 'headings')
const res = await get('/api/search/v1?' + sp)
expect(res.statusCode).toBe(200)
const results = JSON.parse(res.text)
expect(results.meta.found.value).toBeGreaterThanOrEqual(1)
for (const hit of results.hits) {
expect(hit.highlights.headings).toBeTruthy()
expect(hit.highlights.title).toBeFalsy()
expect(hit.highlights.content).toBeFalsy()
}
})
test('version can be aliased', async () => {
const sp = new URLSearchParams()
sp.set('query', 'foo')
@@ -169,6 +234,15 @@ describeIfElasticsearchURL('search v1 middleware', () => {
expect(res.statusCode).toBe(400)
expect(JSON.parse(res.text).error).toMatch('sort')
}
// unrecognized highlights
{
const sp = new URLSearchParams()
sp.set('query', 'test')
sp.set('highlights', 'neverheardof')
const res = await get('/api/search/v1?' + sp)
expect(res.statusCode).toBe(400)
expect(JSON.parse(res.text).error).toMatch('neverheardof')
}
})
test('breadcrumbless records should always return a string', async () => {

View File

@@ -3,7 +3,7 @@
"objectID": "/en/foo",
"breadcrumbs": "fooing",
"title": "Foo",
"headings": "",
"headings": "Heading 1",
"content": "This is a fixture with the silly foo word.",
"topics": ["Test", "Fixture"],
"popularity": 0.5
@@ -12,7 +12,7 @@
"objectID": "/en/bar",
"breadcrumbs": "baring",
"title": "Bar",
"headings": "",
"headings": "Sub-heading This Sub-heading That",
"content": "Can't have foo if you don't also have bar.",
"topics": ["Test", "Fixture", "Get started"],
"popularity": 0.6
@@ -21,7 +21,7 @@
"objectID": "/en/bar",
"breadcrumbs": "",
"title": "Breadcrumbless",
"headings": "",
"headings": "Introduction",
"content": "Not all pages have a breadcrumb. Fact of life.",
"topics": [],
"popularity": 0.1