1
0
mirror of synced 2025-12-19 18:10:59 -05:00
Files
docs/src/search/components/helpers/ai-search-links-json.ts
Evan Bonsignori b099e4a9e3 Ai search UI (#53026)
Co-authored-by: Kevin Heis <heiskr@users.noreply.github.com>
Co-authored-by: Ashish Keshan <ashishkeshan@github.com>
2025-02-05 19:46:58 +00:00

77 lines
2.1 KiB
TypeScript

type LinksJSON = Array<{
type: 'reference' | 'inline'
url: string
product: string
}>
// We use this to generate a JSON string that includes all of the links:
// 1. Included in the AI response (inline)
// 2. Used to generate the AI response via an embedding (reference)
//
// We include the JSON string in our analytics events so we can see the
// most popular sourced references, among other things.
export function generateAiSearchLinksJson(
sourcesBuffer: Array<{ url: string }>,
aiResponse: string,
): string {
const linksJson = [] as LinksJSON
const inlineLinks = extractMarkdownLinks(aiResponse)
for (const link of inlineLinks) {
const product = extractProductFromDocsUrl(link)
linksJson.push({
type: 'inline',
url: link,
product,
})
}
for (const source of sourcesBuffer) {
const product = extractProductFromDocsUrl(source.url)
linksJson.push({
type: 'reference',
url: source.url,
product,
})
}
return JSON.stringify(linksJson)
}
// Get all links in a markdown text
function extractMarkdownLinks(markdownResponse: string) {
// This regex matches markdown links of the form [text](url)
// Explanation:
// \[([^\]]+)\] : Matches the link text inside square brackets (one or more non-']' characters).
// \( : Matches the opening parenthesis.
// ([^)]+) : Captures the URL (one or more characters that are not a closing parenthesis).
// \) : Matches the closing parenthesis.
const regex = /\[([^\]]+)\]\(([^)]+)\)/g
const urls = []
let match
while ((match = regex.exec(markdownResponse)) !== null) {
urls.push(match[2])
}
return urls
}
// Given a Docs URL, extract the product name
function extractProductFromDocsUrl(url: string): string {
const pathname = new URL(url).pathname
const segments = pathname.split('/').filter((segment) => segment)
// If the first segment is a language code (2 characters), then product is the next segment.
// Otherwise, assume the first segment is the product.
if (segments.length === 0) {
return ''
}
if (segments[0].length === 2) {
return segments[1] || ''
}
return segments[0]
}