From a7ccfb8ce2d77fdf32ef41eb7f14651d403d5a64 Mon Sep 17 00:00:00 2001 From: Kevin Heis Date: Tue, 10 Jun 2025 12:05:16 -0700 Subject: [PATCH] Remove general autocomplete feature (#56000) --- .../workflows/index-autocomplete-search.yml | 9 +- package.json | 2 - src/search/README.md | 10 +- src/search/lib/elasticsearch-indexes.ts | 9 +- .../general-autocomplete.ts | 123 --------------- .../search-params-objects.ts | 9 +- src/search/lib/search-request-params/types.ts | 1 - src/search/middleware/search-routes.ts | 38 ----- src/search/scripts/index-test-fixtures.sh | 3 +- src/search/scripts/index/README.md | 7 +- src/search/scripts/index/index-cli.ts | 48 +----- .../index/lib/index-general-autocomplete.ts | 140 ----------------- src/search/scripts/index/utils/mappings.ts | 11 -- src/search/scripts/index/utils/settings.ts | 33 ---- .../tests/api-general-autocomplete-search.ts | 147 ------------------ src/search/types.ts | 2 +- 16 files changed, 14 insertions(+), 578 deletions(-) delete mode 100644 src/search/lib/get-elasticsearch-results/general-autocomplete.ts delete mode 100644 src/search/scripts/index/lib/index-general-autocomplete.ts delete mode 100644 src/search/tests/api-general-autocomplete-search.ts diff --git a/.github/workflows/index-autocomplete-search.yml b/.github/workflows/index-autocomplete-search.yml index 8d116a66ca..09375d0e7b 100644 --- a/.github/workflows/index-autocomplete-search.yml +++ b/.github/workflows/index-autocomplete-search.yml @@ -1,7 +1,7 @@ name: Index autocomplete search in Elasticsearch -# **What it does**: Indexes autocomplete data (general and AI search) into Elasticsearch. -# **Why we have it**: So we can power the APIs for autocomplete. +# **What it does**: Indexes AI search autocomplete data into Elasticsearch. +# **Why we have it**: So we can power the APIs for AI search autocomplete. # **Who does it impact**: docs-engineering on: @@ -40,11 +40,6 @@ jobs: if: ${{ github.event_name == 'pull_request' }} run: curl --fail --retry-connrefused --retry 5 -I http://localhost:9200 - - name: Run general auto-complete indexing - env: - ELASTICSEARCH_URL: ${{ github.event_name == 'pull_request' && 'http://localhost:9200' || secrets.ELASTICSEARCH_URL }} - run: npm run index-general-autocomplete -- docs-internal-data - - name: Run AI search auto-complete indexing env: ELASTICSEARCH_URL: ${{ github.event_name == 'pull_request' && 'http://localhost:9200' || secrets.ELASTICSEARCH_URL }} diff --git a/package.json b/package.json index ce09ea6255..bdb8eda6b9 100644 --- a/package.json +++ b/package.json @@ -49,9 +49,7 @@ "general-search-scrape-server": "cross-env NODE_ENV=production PORT=4002 MINIMAL_RENDER=true CHANGELOG_DISABLED=true tsx src/frame/server.ts", "ghes-release-scrape-with-server": "cross-env GHES_RELEASE=1 start-server-and-test general-search-scrape-server 4002 general-search-scrape", "general-search-scrape-with-server": "cross-env NODE_OPTIONS='--max_old_space_size=8192' start-server-and-test general-search-scrape-server 4002 general-search-scrape", - "index": "tsx src/search/scripts/index/index-cli autocomplete docs-internal-data", "index-ai-search-autocomplete": "tsx src/search/scripts/index/index-cli ai-search-autocomplete", - "index-general-autocomplete": "tsx src/search/scripts/index/index-cli general-autocomplete", "index-general-search": "tsx src/search/scripts/index/index-cli general-search", "index-test-fixtures": "./src/search/scripts/index-test-fixtures.sh", "labeler": "tsx .github/actions/labeler/labeler.ts", diff --git a/src/search/README.md b/src/search/README.md index e4035210b4..7ae3f25742 100644 --- a/src/search/README.md +++ b/src/search/README.md @@ -22,7 +22,7 @@ You can also query our search endpoint directly at: ## Types of search -Our backend currently supports 3 "types" of searching. +Our backend currently supports 2 "types" of searching. All searches accept a `query` param, e.g. `?query=how` and return results based on their type: @@ -30,11 +30,7 @@ All searches accept a `query` param, e.g. `?query=how` and return results based - Results: The pages of our sites that match the query, sorted by popularity - Example: Query = "clone" -> Results - Endpoint: `/api/search/v1` -2. **general autocomplete** - - Results: Potential terms that can be autocompleted from the query based on previous user searches - - Example: Query = "cl" -> A Result = "clone" - - Endpoint: `/api/search/autocomplete/v1` -3. **AI search autocomplete** +2. **AI search autocomplete** - Results: Human-readable full-sentence questions that best match the query. Questions are based on previous searches and popular pages - Example: Query = "How do I clone" -> A Result = "How do I clone a repository?" - Endpoint: `/api/search/ai-search-autocomplete/v1` @@ -66,7 +62,7 @@ The preferred way to build and sync the search indices is to do so via the [GitH ### Actions workflow files - [`.github/workflows/index-general-search.yml`](/.github/workflows/index-general-search.yml) - Populates search indices for **general search** using the `main` branch every four hours. Search indices are stored in an internal-only Elasticsearch instance. To run it manually, click "Run workflow" button in the Actions tab. -- [`.github/workflows/index-autocomplete-search.yml`](/.github/workflows/index-general-search.yml) - Populates search indices for both **general autocomplete** and **AI search autocomplete** using data from an internal repo. Runs daily. +- [`.github/workflows/index-autocomplete-search.yml`](/.github/workflows/index-general-search.yml) - Populates search indices for **AI search autocomplete** using data from an internal repo. Runs daily. ### Notable code files and directories diff --git a/src/search/lib/elasticsearch-indexes.ts b/src/search/lib/elasticsearch-indexes.ts index 349215ecde..08f3ba4c52 100644 --- a/src/search/lib/elasticsearch-indexes.ts +++ b/src/search/lib/elasticsearch-indexes.ts @@ -15,10 +15,9 @@ export type SearchIndex = { /* Elasticsearch uses indexes to group categories of data - We currently have 3 top-level categories of indexes: + We currently have 2 top-level categories of indexes: 1. General search: This is populated using data from all of our Docs pages - 2. General autocomplete: This is populated using analytics search history in docs-internal-data - 3. AI autocomplete: This is populated with human-readable questions using a GPT query in docs-internal-data + 2. AI autocomplete: This is populated with human-readable questions using a GPT query in docs-internal-data This file is intended to be the source of truth for Docs Elasticsearch indexes. @@ -34,10 +33,6 @@ const indexes: SearchIndexes = { prefix, type: 'general-search', }, - generalAutocomplete: { - prefix, - type: 'general-autocomplete', - }, aiSearchAutocomplete: { prefix, type: 'ai-search-autocomplete', diff --git a/src/search/lib/get-elasticsearch-results/general-autocomplete.ts b/src/search/lib/get-elasticsearch-results/general-autocomplete.ts deleted file mode 100644 index 57ff42dd7f..0000000000 --- a/src/search/lib/get-elasticsearch-results/general-autocomplete.ts +++ /dev/null @@ -1,123 +0,0 @@ -import { Client } from '@elastic/elasticsearch' -import { getElasticsearchClient } from '@/search/lib/helpers/get-client' -import { getHighlightConfiguration } from '@/search/lib/get-elasticsearch-results/helpers/elasticsearch-highlight-config' - -import type { QueryDslQueryContainer, SearchTotalHits } from '@elastic/elasticsearch/lib/api/types' -import type { AutocompleteSearchResponse } from '@/search/types' -import type { - AutocompleteMatchQueriesOptions, - AutocompleteResultsArgs, - AutocompleteElasticsearchItem, -} from '@/search/lib/get-elasticsearch-results/types' - -// Query Elasticsearch for general autocomplete results -export async function getAutocompleteSearchResults({ - indexName, - query, - size, - debug = false, -}: AutocompleteResultsArgs): Promise { - const t0 = new Date() - const client = getElasticsearchClient() as Client - - let searchQuery: any = { - index: indexName, - size, - // Send absolutely minimal from Elasticsearch to here. Less data => faster. - _source_includes: ['term'], - } - - const trimmedQuery = query.trim() - // When the query is empty, return no results - if (trimmedQuery === '') { - return { - meta: { - found: { - value: 0, - relation: 'eq', - }, - took: { query_msec: 0, total_msec: new Date().getTime() - t0.getTime() }, - size, - }, - hits: [], - } - } else { - const matchQueries = getAutocompleteMatchQueries(trimmedQuery, { - fuzzy: { - minLength: 3, - maxLength: 20, - }, - }) - const matchQuery: QueryDslQueryContainer = { - bool: { - should: matchQueries, - }, - } - - searchQuery.query = matchQuery - searchQuery.highlight = getHighlightConfiguration(trimmedQuery, ['term']) - } - - const result = await client.search(searchQuery) - - const hitsAll = result.hits - const hits = hitsAll.hits.map((hit) => ({ - term: hit._source?.term, - highlights: (hit.highlight && hit.highlight.term) || [], - ...(debug && { - score: hit._score ?? 0.0, - es_url: - process.env.NODE_ENV !== 'production' - ? `http://localhost:9200/${indexName}/_doc/${hit._id}` - : '', - }), - })) - - return { - meta: { - found: hitsAll.total as SearchTotalHits, - took: { query_msec: result.took, total_msec: new Date().getTime() - t0.getTime() }, - size, - }, - hits, - } -} - -function getAutocompleteMatchQueries(query: string, { fuzzy }: AutocompleteMatchQueriesOptions) { - const BOOST_PHRASE = 4.0 - const BOOST_REGULAR = 2.0 - const BOOST_FUZZY = 0.1 - - const matchQueries: QueryDslQueryContainer[] = [] - const isMultiWordQuery = query.includes(' ') || query.includes('-') - - if (isMultiWordQuery) { - matchQueries.push({ - match_phrase_prefix: { - term: { - query, - boost: BOOST_PHRASE, - }, - }, - }) - } - - matchQueries.push({ - match_bool_prefix: { - term: { - query, - boost: BOOST_REGULAR, - }, - }, - }) - - if (query.length > fuzzy.minLength && query.length < fuzzy.maxLength) { - matchQueries.push({ - fuzzy: { - term: { value: query, boost: BOOST_FUZZY, fuzziness: 'AUTO' }, - }, - }) - } - - return matchQueries -} diff --git a/src/search/lib/search-request-params/search-params-objects.ts b/src/search/lib/search-request-params/search-params-objects.ts index 356e0ff9c9..245fcc0966 100644 --- a/src/search/lib/search-request-params/search-params-objects.ts +++ b/src/search/lib/search-request-params/search-params-objects.ts @@ -11,9 +11,7 @@ import type { SearchRequestQueryParams } from '@/search/lib/search-request-param // Entry to this file, returns the query parameters to expect based on the type of search request export function getSearchRequestParamsObject(type: SearchTypes): SearchRequestQueryParams[] { - if (type === 'generalAutocomplete') { - return AUTOCOMPLETE_PARAMS_OBJ - } else if (type === 'aiSearchAutocomplete') { + if (type === 'aiSearchAutocomplete') { return AI_SEARCH_AUTOCOMPLETE_PARAMS_OBJ } return GENERAL_SEARCH_PARAMS_OBJ @@ -136,11 +134,6 @@ const AI_SEARCH_AUTOCOMPLETE_PARAMS_OBJ: SearchRequestQueryParams[] = [ { key: 'language', default_: 'en', validate: (language: string) => language === 'en' }, ] -const AUTOCOMPLETE_PARAMS_OBJ: SearchRequestQueryParams[] = [ - ...SHARED_AUTOCOMPLETE_PARAMS_OBJ, - { key: 'language', default_: 'en', validate: (language: string) => language in languages }, -] - function toBoolean(value: any): boolean { return value === 'true' || value === '1' } diff --git a/src/search/lib/search-request-params/types.ts b/src/search/lib/search-request-params/types.ts index e9673c7677..5a669925a2 100644 --- a/src/search/lib/search-request-params/types.ts +++ b/src/search/lib/search-request-params/types.ts @@ -33,7 +33,6 @@ export interface ComputedSearchQueryParamsMap { toplevel: string[] aggregate: string[] } - generalAutocomplete: ComputedSearchQueryParams aiSearchAutocomplete: ComputedSearchQueryParams } diff --git a/src/search/middleware/search-routes.ts b/src/search/middleware/search-routes.ts index 2590b48ade..89ceaab265 100644 --- a/src/search/middleware/search-routes.ts +++ b/src/search/middleware/search-routes.ts @@ -13,7 +13,6 @@ import { setFastlySurrogateKey, SURROGATE_ENUMS, } from '@/frame/middleware/set-fastly-surrogate-key.js' -import { getAutocompleteSearchResults } from '@/search/lib/get-elasticsearch-results/general-autocomplete' import { getAISearchAutocompleteResults } from '@/search/lib/get-elasticsearch-results/ai-search-autocomplete' import { getSearchFromRequestParams } from '@/search/lib/search-request-params/get-search-from-request-params' import { getGeneralSearchResults } from '@/search/lib/get-elasticsearch-results/general-search' @@ -59,39 +58,6 @@ router.get( }), ) -router.get( - '/autocomplete/v1', - catchMiddlewareError(async (req: Request, res: Response) => { - const { - indexName, - validationErrors, - searchParams: { query, size, debug }, - } = getSearchFromRequestParams(req, 'generalAutocomplete') - if (validationErrors.length) { - return res.status(400).json(validationErrors[0]) - } - - const options = { - indexName, - query, - size, - debug, - } - try { - const { meta, hits } = await getAutocompleteSearchResults(options) - - if (process.env.NODE_ENV !== 'development') { - searchCacheControl(res) - setFastlySurrogateKey(res, SURROGATE_ENUMS.MANUAL) - } - - res.status(200).json({ meta, hits }) - } catch (error) { - await handleGetSearchResultsError(req, res, error, options) - } - }), -) - router.get( '/ai-search-autocomplete/v1', catchMiddlewareError(async (req: Request, res: Response) => { @@ -161,10 +127,6 @@ router.get('/', (req: Request, res: Response) => { res.redirect(307, req.originalUrl.replace('/search', '/search/v1')) }) -router.get('/autocomplete', (req: Request, res: Response) => { - res.redirect(307, req.originalUrl.replace('/search/autocomplete', '/search/autocomplete/v1')) -}) - router.get('/ai-search-autocomplete', (req: Request, res: Response) => { res.redirect( 307, diff --git a/src/search/scripts/index-test-fixtures.sh b/src/search/scripts/index-test-fixtures.sh index 3091b5e847..d230f61b06 100755 --- a/src/search/scripts/index-test-fixtures.sh +++ b/src/search/scripts/index-test-fixtures.sh @@ -8,8 +8,7 @@ set -e # For general site-search npm run index-general-search -- src/search/tests/fixtures/search-indexes -l en -l ja -V ghec -V fpt --index-prefix tests -# For general autocomplete search -npm run index-general-autocomplete -- src/search/tests/fixtures/data -l en -l ja -v fpt -v ghec --index-prefix tests + # For AI search autocomplete npm run index-ai-search-autocomplete -- src/search/tests/fixtures/data -l en -v fpt -v ghec --index-prefix tests diff --git a/src/search/scripts/index/README.md b/src/search/scripts/index/README.md index a9f9432c0b..aad492e4ee 100644 --- a/src/search/scripts/index/README.md +++ b/src/search/scripts/index/README.md @@ -10,15 +10,14 @@ In production, the indexing happens in the GitHub workflows: `index-autocomplete Before running the indexing for **general search** you run the [scrape](../scrape/README.md) script to scrape page data into files. -Before running the indexing for **general autocomplete** and **AI search autocomplete** you need to clone [docs-internal-data](https://github.com/github/docs-internal-data) to the root of this directory. +Before running the indexing for **AI search autocomplete** you need to clone [docs-internal-data](https://github.com/github/docs-internal-data) to the root of this directory. There is a separate run command for indexing each type of search data: 1. **general search**: `npm run index-general-search -- ` -2. **general autocomplete**: `npm run index-general-autocomplete -- docs-internal-data` (if `docs-internal-data` is cloned to root directory) -3. **AI search autocomplete**: `npm run index-ai-search-autocomplete -- docs-internal-data` (if `docs-internal-data` is cloned to root directory) +2. **AI search autocomplete**: `npm run index-ai-search-autocomplete -- docs-internal-data` (if `docs-internal-data` is cloned to root directory) To see the arguments accepted by any script, pass the `--help` argument, for example ```bash -npm run index-general-autocomplete -- --help +npm run index-ai-search-autocomplete -- --help ``` \ No newline at end of file diff --git a/src/search/scripts/index/index-cli.ts b/src/search/scripts/index/index-cli.ts index dc4553d287..18cd552ba9 100644 --- a/src/search/scripts/index/index-cli.ts +++ b/src/search/scripts/index/index-cli.ts @@ -3,7 +3,7 @@ import { errors } from '@elastic/elasticsearch' import dotenv from 'dotenv' import { languageKeys } from '@/languages/lib/languages.js' -import { indexGeneralAutocomplete } from './lib/index-general-autocomplete' + import { indexGeneralSearch } from './lib/index-general-search' import { allIndexVersionKeys, @@ -19,51 +19,6 @@ program.name('index').description('CLI scripts for indexing Docs data into Elast const allVersionKeysWithAll = [...allIndexVersionKeys, 'all'] -const generalAutoCompleteCommand = new Command('general-autocomplete') - .description('Index for general search autocomplete') - .addOption( - new Option('-l, --language ', 'Specific languages(s)').choices(languageKeys), - ) - .addOption( - new Option('-v, --version ', 'Specific versions').choices(allVersionKeysWithAll), - ) - .option('--verbose', 'Verbose output') - .option('--index-prefix ', 'Prefix for the index names', '') - .argument('', 'path to the docs-internal-data repo') - .action(async (dataRepoRoot: string, options) => { - const languages = options.language ? options.language : languageKeys - const indexPrefix = options.indexPrefix || '' - if (!Array.isArray(options.version)) { - if (typeof options.version === 'undefined') { - options.version = ['all'] - } else { - options.version = [options.version] - } - } - let versions = options.version - if (!versions.length || versions[0] === 'all') { - versions = supportedAutocompletePlanVersions - } else { - versions = versions.map((version: string) => versionToIndexVersionMap[version]) - } - try { - await indexGeneralAutocomplete({ - dataRepoRoot, - languages, - versions, - indexPrefix, - }) - } catch (error: any) { - if (error instanceof errors.ElasticsearchClientError) { - if ((error as any)?.meta) { - console.error('Error meta: %O', (error as any).meta) - } - } - console.error('general-autocomplete indexing error:', error.message) - process.exit(1) - } - }) - const generalSearchCommand = new Command('general-search') .description( 'Indexes records for general search. Records should be pre-scraped by the scrape script.', @@ -179,7 +134,6 @@ const aiSearchAutocompleteCommand = new Command('ai-search-autocomplete') } }) -program.addCommand(generalAutoCompleteCommand) program.addCommand(generalSearchCommand) program.addCommand(aiSearchAutocompleteCommand) diff --git a/src/search/scripts/index/lib/index-general-autocomplete.ts b/src/search/scripts/index/lib/index-general-autocomplete.ts deleted file mode 100644 index 0d2eeb036f..0000000000 --- a/src/search/scripts/index/lib/index-general-autocomplete.ts +++ /dev/null @@ -1,140 +0,0 @@ -import fs from 'node:fs' -import path from 'node:path' - -import { getElasticsearchClient } from '@/search/lib/helpers/get-client' -import { getElasticSearchIndex } from '@/search/lib/elasticsearch-indexes' -import { - createIndex, - populateIndex, - printSuccess, - updateAlias, -} from '@/search/scripts/index/utils/indexing-elasticsearch-utils' -import { getGeneralAutocompleteSettings } from '@/search/scripts/index/utils/settings' -import { generalAutocompleteMappings } from '@/search/scripts/index/utils/mappings' -import { getPlanVersionFromIndexVersion } from '@/search/lib/elasticsearch-versions' - -import type { TermsWithFrequency } from '@/search/scripts/index/types' - -type Options = { - dataRepoRoot: string - languages: string[] - versions: string[] - retries?: number - sleepTime?: number - verbose?: boolean - indexPrefix?: string -} - -export async function indexGeneralAutocomplete(options: Options) { - const client = getElasticsearchClient(undefined, options.verbose) - await client.ping() // Will throw if not available - - console.log( - 'Indexing general autocomplete for languages: %O and versions: %O', - options.languages, - options.versions, - ) - - const { dataRepoRoot, versions, languages } = options - for (const language of languages) { - for (const version of versions) { - const startTime = new Date() - - const records = loadTermsWithFrequency({ version, language, dataRepoRoot }) - const { indexName, indexAlias } = getElasticSearchIndex( - 'generalAutocomplete', - version, - language, - options.indexPrefix || '', - ) - - const settings = getGeneralAutocompleteSettings(language, options.verbose) - - await createIndex(client, indexAlias, settings, generalAutocompleteMappings) - - const recordsArray = Object.entries(records).map(([term, popularity]) => ({ - term, - popularity, - })) - - await populateIndex(client, indexAlias, indexName, recordsArray, { - retries: options.retries, - sleepTime: options.sleepTime, - verbose: options.verbose, - }) - - await updateAlias(client, indexName, indexAlias, options) - - printSuccess(indexName, startTime, options.verbose) - } - } -} - -type LoadOptions = { - dataRepoRoot: string - language: string - version: string -} - -/* - * Terms are one-word search terms that a user might enter into a search toolbar - * We have two sources of "terms": - * - Previous user searches (searchTerms) - * - Terms auto-generated taking each word from each title of all of our articles (documentTerms) - * - * Each of the files live in our docs-internal-data repo that should be cloned before running this script. - * The paths to these files for each type of term are: - * - searchTerms: hydro/rollups/user-searches/{langauge}/{version}/rollup.json - * - documentTerms: hydro/rollups/user-searches/{langauge}/{version}/rollup.json - */ -function loadTermsWithFrequency(options: LoadOptions): TermsWithFrequency { - // The {version} in the paths uses the version's 'plan' name, e.g. `free-pro-team` instead of `fpt` - const internalDataVersion = getPlanVersionFromIndexVersion(options.version) - - if (!internalDataVersion) { - throw new Error(`No rollup version found for version ${options.version}`) - } - - const filePath = path.join( - options.dataRepoRoot, - 'hydro/rollups/user-searches', - options.language, - internalDataVersion, - 'rollup.json', - ) - const terms: TermsWithFrequency = {} - - const userSearchTerms: TermsWithFrequency = JSON.parse(fs.readFileSync(filePath, 'utf8')) - let maxFrequency = Math.max(...Object.values(userSearchTerms)) - if (maxFrequency === 0) { - throw new Error(`No records found for ${options.language} ${options.version}`) - } - for (const [term, frequency] of Object.entries(userSearchTerms)) { - // Normalize the frequency which will turn into "popularity" in ElasticSearch - // We include +1 here because "userSearchTerms" should have higher priority than "articleTitleTerms" - terms[term] = frequency / maxFrequency + 1 - } - - const articleTitleTermsFilePath = path.join( - options.dataRepoRoot, - 'all-documents/terms', - options.language, - internalDataVersion, - 'terms.json', - ) - const articleTitleTerms: TermsWithFrequency = JSON.parse( - fs.readFileSync(articleTitleTermsFilePath, 'utf8'), - ) - maxFrequency = Math.max(...Object.values(articleTitleTerms)) - if (maxFrequency === 0) { - throw new Error(`No document title records found for ${options.language} ${options.version}`) - } - for (const [articleTitleTerm, frequency] of Object.entries(articleTitleTerms)) { - if (!(articleTitleTerm in terms)) { - // Notice that we don't + 1 here because we want to give more priority to data from user searches - terms[articleTitleTerm] = frequency / maxFrequency - } - } - - return terms -} diff --git a/src/search/scripts/index/utils/mappings.ts b/src/search/scripts/index/utils/mappings.ts index 1bacf528ee..3ffd1aa24d 100644 --- a/src/search/scripts/index/utils/mappings.ts +++ b/src/search/scripts/index/utils/mappings.ts @@ -29,17 +29,6 @@ export const generalSearchMappings: estypes.MappingTypeMapping = { }, } -export const generalAutocompleteMappings: estypes.MappingTypeMapping = { - properties: { - term: { - type: 'text', - analyzer: 'text_analyzer', - term_vector: 'with_positions_offsets', - }, - popularity: { type: 'float' }, - }, -} - export const aiSearchAutocompleteMappings: estypes.MappingTypeMapping = { properties: { term: { diff --git a/src/search/scripts/index/utils/settings.ts b/src/search/scripts/index/utils/settings.ts index a2d65ca29f..aec9eda86b 100644 --- a/src/search/scripts/index/utils/settings.ts +++ b/src/search/scripts/index/utils/settings.ts @@ -51,39 +51,6 @@ export function getGeneralSearchSettings( return settings } -export function getGeneralAutocompleteSettings( - language: string, - verbose = false, -): estypes.IndicesIndexSettings { - const settings: estypes.IndicesIndexSettings = { - analysis: { - analyzer: { - text_analyzer: { - filter: ['lowercase'], - tokenizer: 'standard', - type: 'custom', - } as AnalysisCustomAnalyzer, - }, - filter: {}, - }, - } - - const snowballLanguage = SNOWBALL_LANGUAGES[language] - if (snowballLanguage) { - const textAnalyzer = settings.analysis!.analyzer!.text_analyzer as AnalysisCustomAnalyzer - textAnalyzer.filter!.push('languaged_snowball') - - settings.analysis!.filter!['languaged_snowball'] = { - type: 'snowball', - language: snowballLanguage as AnalysisSnowballLanguage, - } - } else if (verbose) { - console.warn(`No snowball language for '${language}'`) - } - - return settings -} - export function getAISearchAutocompleteSettings( language: string, verbose = false, diff --git a/src/search/tests/api-general-autocomplete-search.ts b/src/search/tests/api-general-autocomplete-search.ts deleted file mode 100644 index f3e7120a0b..0000000000 --- a/src/search/tests/api-general-autocomplete-search.ts +++ /dev/null @@ -1,147 +0,0 @@ -/** - * To be able to run these tests you need to index the fixtures! - * And you need to have an Elasticsearch URL to connect to for the server. - * - * To index the fixtures, run: - * - * ELASTICSEARCH_URL=http://localhost:9200 npm run index-test-fixtures - * - * This will replace any "real" Elasticsearch indexes you might have so - * once you're done working on vitest tests you need to index real - * content again. - */ - -import { expect, test, vi } from 'vitest' - -import { describeIfElasticsearchURL } from '@/tests/helpers/conditional-runs.js' -import { get } from '@/tests/helpers/e2etest-ts' -import type { AutocompleteSearchResponse, SearchValidationErrorEntry } from '@/search/types' - -if (!process.env.ELASTICSEARCH_URL) { - console.warn( - 'None of the API search middleware tests are run because ' + - "the environment variable 'ELASTICSEARCH_URL' is currently not set.", - ) -} - -// This suite only runs if $ELASTICSEARCH_URL is set. -describeIfElasticsearchURL('search/autocomplete v1 middleware', () => { - vi.setConfig({ testTimeout: 60 * 1000 }) - - test('basic search', async () => { - const sp: URLSearchParams = new URLSearchParams() - // To see why this will work, - // see src/search/tests/fixtures/data - sp.set('query', 'fo') - const res = await get('/api/search/autocomplete/v1?' + sp.toString()) - expect(res.statusCode).toBe(200) - const results: AutocompleteSearchResponse = JSON.parse(res.body) - - expect(results.meta).toBeTruthy() - expect(results.meta.found.value).toBeGreaterThanOrEqual(1) - expect(results.meta.found.relation).toBeTruthy() - - // Might be empty but at least an array - expect(results.hits).toBeTruthy() - // The work "fork" matches "fo" - const hit = results.hits[0] - expect(hit.term).toBe('fork') - expect(hit.highlights).toBeTruthy() - expect(hit.highlights[0]).toBe('fork') - - // Check that it can be cached at the CDN - expect(res.headers['set-cookie']).toBeUndefined() - expect(res.headers['cache-control']).toContain('public') - expect(res.headers['cache-control']).toMatch(/max-age=[1-9]/) - expect(res.headers['surrogate-control']).toContain('public') - expect(res.headers['surrogate-control']).toMatch(/max-age=[1-9]/) - expect(res.headers['surrogate-key']).toBe('manual-purge') - }) - - test('invalid version', async () => { - const sp: URLSearchParams = new URLSearchParams() - sp.set('query', 'fo') - sp.set('version', 'never-heard-of') - const res = await get('/api/search/autocomplete/v1?' + sp.toString()) - expect(res.statusCode).toBe(400) - const errorResponse: SearchValidationErrorEntry = JSON.parse(res.body).error - expect(errorResponse).toBeTruthy() - }) - - test('variations on version name', async () => { - const sp: URLSearchParams = new URLSearchParams() - sp.set('query', 'fo') - - // Test with 'enterprise-cloud' version - sp.set('version', 'enterprise-cloud') - { - const res = await get('/api/search/autocomplete/v1?' + sp.toString()) - expect(res.statusCode).toBe(200) - } - - // Test with 'ghec' version - sp.set('version', 'ghec') - { - const res = await get('/api/search/autocomplete/v1?' + sp.toString()) - expect(res.statusCode).toBe(200) - } - - // Test with 'fpt' version - sp.set('version', 'fpt') - { - const res = await get('/api/search/autocomplete/v1?' + sp.toString()) - expect(res.statusCode).toBe(200) - } - - // Test with 'free-pro-team@latest' version - sp.set('version', 'free-pro-team@latest') - { - const res = await get('/api/search/autocomplete/v1?' + sp.toString()) - expect(res.statusCode).toBe(200) - } - }) - - test('invalid language', async () => { - const sp: URLSearchParams = new URLSearchParams() - sp.set('query', 'fo') - sp.set('language', 'xx') - const res = await get('/api/search/autocomplete/v1?' + sp.toString()) - expect(res.statusCode).toBe(400) - const errorResponse: SearchValidationErrorEntry = JSON.parse(res.body).error - expect(errorResponse).toBeTruthy() - }) - - test('fuzzy autocomplete search', async () => { - const sp: URLSearchParams = new URLSearchParams() - sp.set('query', 'forc') - const res = await get('/api/search/autocomplete/v1?' + sp.toString()) - expect(res.statusCode).toBe(200) - const results: AutocompleteSearchResponse = JSON.parse(res.body) - // The work "fork" matches "forc" - const hit = results.hits[0] - expect(hit.term).toBe('fork') - expect(hit.highlights).toBeTruthy() - expect(hit.highlights[0]).toBe('fork') - }) - - test('invalid query', async () => { - const sp: URLSearchParams = new URLSearchParams() - // No query at all - { - const res = await get('/api/search/autocomplete/v1?' + sp.toString()) - expect(res.statusCode).toBe(400) - } - // Empty query - { - sp.set('query', '') - const res = await get('/api/search/autocomplete/v1?' + sp.toString()) - expect(res.statusCode).toBe(400) - } - // Empty when trimmed - { - sp.set('query', ' ') - const res = await get('/api/search/autocomplete/v1?' + sp.toString()) - expect(res.statusCode).toBe(400) - } - }) -}) diff --git a/src/search/types.ts b/src/search/types.ts index a5fb8d47ef..e9e47c7253 100644 --- a/src/search/types.ts +++ b/src/search/types.ts @@ -5,7 +5,7 @@ import type { ComputedSearchQueryParamsMap, } from '@/search/lib/search-request-params/types' -export type SearchTypes = 'generalSearch' | 'generalAutocomplete' | 'aiSearchAutocomplete' +export type SearchTypes = 'generalSearch' | 'aiSearchAutocomplete' // Responses to API routes export interface GeneralSearchResponse {