add search_client to search events when the host is not docs.github.com (#56458)
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -371,6 +371,10 @@ const search = {
|
||||
type: 'string',
|
||||
description: 'Any additional search context, such as component searched.',
|
||||
},
|
||||
search_client: {
|
||||
type: 'string',
|
||||
description: 'The client name identifier when the request is not from docs.github.com.',
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -104,6 +104,7 @@ export type EventPropsByType = {
|
||||
[EventType.search]: {
|
||||
search_query: string
|
||||
search_context?: string
|
||||
search_client?: string
|
||||
}
|
||||
[EventType.searchResult]: {
|
||||
search_result_query: string
|
||||
|
||||
@@ -4,6 +4,7 @@ import got from 'got'
|
||||
import { getHmacWithEpoch } from '@/search/lib/helpers/get-cse-copilot-auth'
|
||||
import { getCSECopilotSource } from '@/search/lib/helpers/cse-copilot-docs-versions'
|
||||
import type { ExtendedRequest } from '@/types'
|
||||
import { handleExternalSearchAnalytics } from '@/search/lib/helpers/external-search-analytics'
|
||||
|
||||
export const aiSearchProxy = async (req: ExtendedRequest, res: Response) => {
|
||||
const { query, version } = req.body
|
||||
@@ -29,6 +30,15 @@ export const aiSearchProxy = async (req: ExtendedRequest, res: Response) => {
|
||||
return
|
||||
}
|
||||
|
||||
// Handle search analytics and client_name validation
|
||||
const analyticsError = await handleExternalSearchAnalytics(req, 'ai-search')
|
||||
if (analyticsError) {
|
||||
res.status(analyticsError.status).json({
|
||||
errors: [{ message: analyticsError.error }],
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
const diagnosticTags = [
|
||||
`version:${version}`.slice(0, 200),
|
||||
`language:${req.language}`.slice(0, 200),
|
||||
|
||||
95
src/search/lib/helpers/external-search-analytics.ts
Normal file
95
src/search/lib/helpers/external-search-analytics.ts
Normal file
@@ -0,0 +1,95 @@
|
||||
import { publish } from '@/events/lib/hydro'
|
||||
import { hydroNames } from '@/events/lib/schema'
|
||||
|
||||
/**
|
||||
* Handles search analytics and client_name validation for external requests
|
||||
* Returns null if the request should continue, or an error response object if validation failed
|
||||
*/
|
||||
export async function handleExternalSearchAnalytics(
|
||||
req: any,
|
||||
searchContext: string,
|
||||
): Promise<{ error: string; status: number } | null> {
|
||||
const host = req.headers['x-host'] || req.headers.host
|
||||
const normalizedHost = stripPort(host as string)
|
||||
|
||||
// Skip analytics entirely for production and internal staging environments
|
||||
if (
|
||||
normalizedHost === 'docs.github.com' ||
|
||||
normalizedHost.endsWith('.github.net') ||
|
||||
normalizedHost.endsWith('.githubapp.com')
|
||||
) {
|
||||
return null
|
||||
}
|
||||
|
||||
// For localhost, send analytics but auto-set client_name if not provided
|
||||
let client_name = req.query.client_name || req.body?.client_name
|
||||
if (normalizedHost === 'localhost' && !client_name) {
|
||||
client_name = 'localhost'
|
||||
}
|
||||
|
||||
// For all other external requests, require explicit client_name
|
||||
if (!client_name) {
|
||||
return {
|
||||
status: 400,
|
||||
error: "Missing required parameter 'client_name' for external requests",
|
||||
}
|
||||
}
|
||||
|
||||
// Send search event with client identifier
|
||||
try {
|
||||
await publish({
|
||||
schema: hydroNames.search,
|
||||
value: {
|
||||
type: 'search',
|
||||
version: '1.0.0',
|
||||
context: {
|
||||
event_id: crypto.randomUUID(),
|
||||
user: 'server-side',
|
||||
version: '1.0.0',
|
||||
created: new Date().toISOString(),
|
||||
hostname: normalizedHost,
|
||||
path: '',
|
||||
search: '',
|
||||
hash: '',
|
||||
path_language: 'en',
|
||||
path_version: '',
|
||||
path_product: '',
|
||||
path_article: '',
|
||||
},
|
||||
search_query: 'REDACTED',
|
||||
search_context: searchContext,
|
||||
search_client: client_name as string,
|
||||
},
|
||||
})
|
||||
} catch (error) {
|
||||
// Don't fail the request if analytics fails
|
||||
console.error('Failed to send search analytics:', error)
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if a host should bypass client_name requirement for analytics
|
||||
* Returns true if the host is docs.github.com or ends with github.net or githubapp.com
|
||||
* (for production and internal staging environments)
|
||||
* Note: localhost is NOT included here as it should send analytics with auto-set client_name
|
||||
*/
|
||||
export function shouldBypassClientNameRequirement(host: string | undefined): boolean {
|
||||
if (!host) return false
|
||||
|
||||
const normalizedHost = stripPort(host)
|
||||
return (
|
||||
normalizedHost === 'docs.github.com' ||
|
||||
normalizedHost.endsWith('.github.net') ||
|
||||
normalizedHost.endsWith('.githubapp.com')
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Strips port number from host string
|
||||
*/
|
||||
function stripPort(host: string): string {
|
||||
const [hostname] = host.split(':')
|
||||
return hostname
|
||||
}
|
||||
@@ -3,6 +3,7 @@ import { getAISearchAutocompleteResults } from '@/search/lib/get-elasticsearch-r
|
||||
import { searchCacheControl } from '@/frame/middleware/cache-control'
|
||||
import { SURROGATE_ENUMS, setFastlySurrogateKey } from '@/frame/middleware/set-fastly-surrogate-key'
|
||||
import { handleGetSearchResultsError } from '@/search/middleware/search-routes'
|
||||
import { handleExternalSearchAnalytics } from '@/search/lib/helpers/external-search-analytics'
|
||||
|
||||
import type { Request, Response } from 'express'
|
||||
import type { CombinedSearchResponse, GeneralSearchResponse } from '@/search/types'
|
||||
@@ -35,6 +36,14 @@ export async function combinedSearchRoute(req: Request, res: Response) {
|
||||
return res.status(400).json(combinedValidationErrors[0])
|
||||
}
|
||||
|
||||
// Handle search analytics and client_name validation
|
||||
const analyticsError = await handleExternalSearchAnalytics(req, 'combined-search')
|
||||
if (analyticsError) {
|
||||
return res.status(analyticsError.status).json({
|
||||
error: analyticsError.error,
|
||||
})
|
||||
}
|
||||
|
||||
try {
|
||||
const autocompletePromise = getAISearchAutocompleteResults({
|
||||
indexName: aiIndexName,
|
||||
|
||||
@@ -17,6 +17,7 @@ import { getAISearchAutocompleteResults } from '@/search/lib/get-elasticsearch-r
|
||||
import { getSearchFromRequestParams } from '@/search/lib/search-request-params/get-search-from-request-params'
|
||||
import { getGeneralSearchResults } from '@/search/lib/get-elasticsearch-results/general-search'
|
||||
import { combinedSearchRoute } from '@/search/lib/routes/combined-search-route'
|
||||
import { handleExternalSearchAnalytics } from '@/search/lib/helpers/external-search-analytics'
|
||||
|
||||
const router = express.Router()
|
||||
|
||||
@@ -36,6 +37,14 @@ router.get(
|
||||
return res.status(400).json(validationErrors[0])
|
||||
}
|
||||
|
||||
// Handle search analytics and client_name validation
|
||||
const analyticsError = await handleExternalSearchAnalytics(req, 'general-search')
|
||||
if (analyticsError) {
|
||||
return res.status(analyticsError.status).json({
|
||||
error: analyticsError.error,
|
||||
})
|
||||
}
|
||||
|
||||
const getResultOptions = {
|
||||
indexName,
|
||||
searchParams,
|
||||
|
||||
Reference in New Issue
Block a user