1
0
mirror of synced 2025-12-19 18:10:59 -05:00

Replace got with fetch in 7 files (Phase 3A/3B) (#57193)

This commit is contained in:
Kevin Heis
2025-08-19 13:55:19 -07:00
committed by GitHub
parent 3195047293
commit a4e37b8cc9
7 changed files with 115 additions and 66 deletions

View File

@@ -1,6 +1,5 @@
import { createHmac } from 'crypto'
import { Agent } from 'node:https'
import got from 'got'
import { fetchWithRetry } from '@/frame/lib/fetch-utils'
import { isNil } from 'lodash-es'
import statsd from '@/observability/lib/statsd'
import { report } from '@/observability/lib/failbot'
@@ -15,7 +14,6 @@ const X_HYDRO_APP = 'docs-production'
const CLUSTER = 'potomac' // We only have ability to publish externally to potomac cluster
const TIMEOUT = MAX_REQUEST_TIMEOUT - 1000 // Limit because Express will terminate at MAX_REQUEST_TIMEOUT
const RETRIES = 0 // We care about aggregate statistics; a few dropped events isn't a big deal
const httpsAgent = new Agent({ keepAlive: true, maxSockets: 32 }) // keepAlive: https://gh.io/AAk2qio -- 32: https://bit.ly/3Tywd1U
const { NODE_ENV, HYDRO_SECRET, HYDRO_ENDPOINT } = process.env
const inProd = NODE_ENV === 'production'
@@ -48,19 +46,27 @@ async function _publish(
})
const token = createHmac('sha256', secret).update(requestBody).digest('hex')
const response = await got.post(endpoint, {
body: requestBody,
agent: { https: httpsAgent },
headers: {
Authorization: `Hydro ${token}`,
'Content-Type': 'application/json',
'X-Hydro-App': X_HYDRO_APP,
// Note: Custom HTTPS agent (keepAlive, maxSockets) not supported with native fetch
// Consider using undici.fetch() if custom agent behavior is critical
const response = await fetchWithRetry(
endpoint,
{
method: 'POST',
body: requestBody,
headers: {
Authorization: `Hydro ${token}`,
'Content-Type': 'application/json',
'X-Hydro-App': X_HYDRO_APP,
},
},
throwHttpErrors: false,
retry: { limit: RETRIES },
timeout: { request: TIMEOUT },
})
const { statusCode, body } = response
{
retries: RETRIES,
timeout: TIMEOUT,
throwHttpErrors: false,
},
)
const statusCode = response.status
const body = await response.text()
statsd.increment('hydro.response_code.all', 1, [`response_code:${statusCode}`])

View File

@@ -2,12 +2,13 @@
* Utility functions for fetch with retry and timeout functionality
* to replace got library functionality
*/
export interface FetchWithRetryOptions {
retries?: number
retryDelay?: number
timeout?: number
throwHttpErrors?: boolean
// Note: Custom HTTPS agents are not supported in native fetch
// Consider using undici or node-fetch if custom agent support is critical
}
/**

View File

@@ -1,49 +1,31 @@
import got, { type OptionsOfTextResponseBody, type Method } from 'got'
import { fetchWithRetry } from '@/frame/lib/fetch-utils'
import { Failbot, HTTPBackend } from '@github/failbot'
import { getLoggerContext } from '@/observability/logger/lib/logger-context'
const HAYSTACK_APP = 'docs'
async function retryingGot(input: RequestInfo | URL, init?: RequestInit): Promise<Response> {
async function retryingFetch(input: RequestInfo | URL, init?: RequestInit): Promise<Response> {
const url = typeof input === 'string' ? input : input.toString()
// Extract body from fetch init for got options
const gotOptions: OptionsOfTextResponseBody = {
method: (init?.method as Method) || 'GET',
body: typeof init?.body === 'string' ? init.body : undefined,
headers: init?.headers as Record<string, string> | undefined,
// With the timeout at 3000 (milliseconds) and the retry.limit
// at 4 (times), the total worst-case is:
// 3000 * 4 + 1000 + 2000 + 3000 + 4000 + 8000 = 30 seconds
timeout: {
response: 3000,
// Use fetchWithRetry with retry configuration matching got's behavior
// With the timeout at 3000 (milliseconds) and the retry.limit
// at 4 (times), the total worst-case is:
// 3000 * 4 + 1000 + 2000 + 3000 + 4000 + 8000 = 30 seconds
const response = await fetchWithRetry(
url,
{
method: init?.method || 'GET',
body: init?.body,
headers: init?.headers,
},
retry: {
// This means it will wait...
// 1. 1000ms
// 2. 2000ms
// 3. 4000ms
// 4. 8000ms
// 5. give up!
//
// From the documentation:
//
// Delays between retries counts with function
// 1000 * Math.pow(2, retry - 1) + Math.random() * 100,
// where retry is attempt number (starts from 1).
//
limit: 4,
{
timeout: 3000,
retries: 4,
throwHttpErrors: false, // Let failbot handle HTTP errors
},
}
)
const gotResponse = await got(url, gotOptions)
// Convert got response to fetch-compatible Response
return new Response(gotResponse.body, {
status: gotResponse.statusCode,
statusText: gotResponse.statusMessage,
headers: gotResponse.headers as HeadersInit,
})
return response
}
export function report(error: Error, metadata?: Record<string, unknown>) {
@@ -55,7 +37,7 @@ export function report(error: Error, metadata?: Record<string, unknown>) {
const backends = [
new HTTPBackend({
haystackURL: process.env.HAYSTACK_URL,
fetchFn: retryingGot,
fetchFn: retryingFetch,
}),
]
const failbot = new Failbot({

View File

@@ -6,7 +6,7 @@ This file & middleware is for when a user requests our /search page e.g. 'docs.g
When a user directly hits our API e.g. /api/search/v1?query=foo, they will hit the routes in ./search-routes.ts
*/
import got from 'got'
import { fetchWithRetry } from '@/frame/lib/fetch-utils'
import { Request, Response, NextFunction } from 'express'
import { errors } from '@elastic/elasticsearch'
import statsd from '@/observability/lib/statsd'
@@ -172,5 +172,10 @@ async function getProxySearch(
// Add client_name for external API requests
url.searchParams.set('client_name', 'docs.github.com-client')
console.log(`Proxying search to ${url}`)
return got(url).json<GeneralSearchResponse>()
const response = await fetchWithRetry(url.toString())
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`)
}
return response.json() as Promise<GeneralSearchResponse>
}

View File

@@ -2,7 +2,6 @@ import eventToPromise from 'event-to-promise'
import chalk from 'chalk'
import dotenv from 'dotenv'
import boxen from 'boxen'
import { HTTPError } from 'got'
import languages from '@/languages/lib/languages'
import parsePageSectionsIntoRecords from '@/search/scripts/scrape/lib/parse-page-sections-into-records'
@@ -12,6 +11,23 @@ import { getAllVersionsKeyFromIndexVersion } from '@/search/lib/elasticsearch-ve
import type { Page, Permalink, Record, Config, Redirects } from '@/search/scripts/scrape/types'
// Custom error class to replace got's HTTPError
class HTTPError extends Error {
response: { ok: boolean; statusCode?: number }
request: { requestUrl?: { pathname?: string } }
constructor(
message: string,
response: { ok: boolean; statusCode?: number },
request: { requestUrl?: { pathname?: string } },
) {
super(message)
this.name = 'HTTPError'
this.response = response
this.request = request
}
}
const pageMarker = chalk.green('|')
const recordMarker = chalk.grey('.')
const port = 4002

View File

@@ -1,10 +1,27 @@
import { EventEmitter } from 'events'
import Bottleneck from 'bottleneck'
import got from 'got'
import { fetchWithRetry } from '@/frame/lib/fetch-utils'
import cheerio from 'cheerio'
import type { Permalink } from '@/search/scripts/scrape/types'
// Custom error class to match got's HTTPError interface
class HTTPError extends Error {
response: { ok: boolean; statusCode?: number }
request: { requestUrl?: { pathname?: string } }
constructor(
message: string,
response: { ok: boolean; statusCode?: number },
request: { requestUrl?: { pathname?: string } },
) {
super(message)
this.name = 'HTTPError'
this.response = response
this.request = request
}
}
interface DomWaiterOptions {
parseDOM?: boolean
json?: boolean
@@ -45,7 +62,15 @@ async function getPage(page: Permalink, emitter: EventEmitter, opts: DomWaiterOp
if (opts.json) {
try {
const json = await got(page.url!).json()
const response = await fetchWithRetry(page.url!)
if (!response.ok) {
throw new HTTPError(
`HTTP ${response.status}: ${response.statusText}`,
{ ok: response.ok, statusCode: response.status },
{ requestUrl: { pathname: page.url } },
)
}
const json = await response.json()
const pageCopy = Object.assign({}, page, { json })
emitter.emit('page', pageCopy)
} catch (err) {
@@ -53,7 +78,15 @@ async function getPage(page: Permalink, emitter: EventEmitter, opts: DomWaiterOp
}
} else {
try {
const body = (await got(page.url!)).body
const response = await fetchWithRetry(page.url!)
if (!response.ok) {
throw new HTTPError(
`HTTP ${response.status}: ${response.statusText}`,
{ ok: response.ok, statusCode: response.status },
{ requestUrl: { pathname: page.url } },
)
}
const body = await response.text()
const pageCopy = Object.assign({}, page, { body })
if (opts.parseDOM) (pageCopy as any).$ = cheerio.load(body)
emitter.emit('page', pageCopy)

View File

@@ -39,7 +39,7 @@ import fs from 'fs'
import path from 'path'
import cheerio from 'cheerio'
import got from 'got'
import { fetchWithRetry } from '@/frame/lib/fetch-utils'
interface ReadabilityMetrics {
fleschReadingEase: number
@@ -174,7 +174,12 @@ async function waitForServer(): Promise<void> {
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
try {
await got(makeURL('/'), { timeout: { request: 5000 } })
const response = await fetchWithRetry(makeURL('/'), undefined, {
timeout: 5000,
})
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`)
}
console.log('Server is ready!')
return
} catch (error) {
@@ -202,18 +207,19 @@ async function analyzeFile(filePath: string): Promise<PageReadability | null> {
try {
// Fetch the rendered page
const response = await got(makeURL(urlPath), {
timeout: { request: 30000 },
const response = await fetchWithRetry(makeURL(urlPath), undefined, {
timeout: 30000,
throwHttpErrors: false,
})
if (response.statusCode !== 200) {
console.warn(`Skipping ${urlPath}: HTTP ${response.statusCode}`)
if (response.status !== 200) {
console.warn(`Skipping ${urlPath}: HTTP ${response.status}`)
return null
}
// Parse HTML and extract content
const $ = cheerio.load(response.body)
const body = await response.text()
const $ = cheerio.load(body)
// Get page title
const title = $('h1').first().text().trim() || $('title').text().trim() || 'Untitled'