1
0
mirror of synced 2025-12-22 11:26:57 -05:00

Replace got with fetch in 7 files (Phase 3A/3B) (#57193)

This commit is contained in:
Kevin Heis
2025-08-19 13:55:19 -07:00
committed by GitHub
parent 3195047293
commit a4e37b8cc9
7 changed files with 115 additions and 66 deletions

View File

@@ -1,6 +1,5 @@
import { createHmac } from 'crypto' import { createHmac } from 'crypto'
import { Agent } from 'node:https' import { fetchWithRetry } from '@/frame/lib/fetch-utils'
import got from 'got'
import { isNil } from 'lodash-es' import { isNil } from 'lodash-es'
import statsd from '@/observability/lib/statsd' import statsd from '@/observability/lib/statsd'
import { report } from '@/observability/lib/failbot' import { report } from '@/observability/lib/failbot'
@@ -15,7 +14,6 @@ const X_HYDRO_APP = 'docs-production'
const CLUSTER = 'potomac' // We only have ability to publish externally to potomac cluster const CLUSTER = 'potomac' // We only have ability to publish externally to potomac cluster
const TIMEOUT = MAX_REQUEST_TIMEOUT - 1000 // Limit because Express will terminate at MAX_REQUEST_TIMEOUT const TIMEOUT = MAX_REQUEST_TIMEOUT - 1000 // Limit because Express will terminate at MAX_REQUEST_TIMEOUT
const RETRIES = 0 // We care about aggregate statistics; a few dropped events isn't a big deal const RETRIES = 0 // We care about aggregate statistics; a few dropped events isn't a big deal
const httpsAgent = new Agent({ keepAlive: true, maxSockets: 32 }) // keepAlive: https://gh.io/AAk2qio -- 32: https://bit.ly/3Tywd1U
const { NODE_ENV, HYDRO_SECRET, HYDRO_ENDPOINT } = process.env const { NODE_ENV, HYDRO_SECRET, HYDRO_ENDPOINT } = process.env
const inProd = NODE_ENV === 'production' const inProd = NODE_ENV === 'production'
@@ -48,19 +46,27 @@ async function _publish(
}) })
const token = createHmac('sha256', secret).update(requestBody).digest('hex') const token = createHmac('sha256', secret).update(requestBody).digest('hex')
const response = await got.post(endpoint, { // Note: Custom HTTPS agent (keepAlive, maxSockets) not supported with native fetch
body: requestBody, // Consider using undici.fetch() if custom agent behavior is critical
agent: { https: httpsAgent }, const response = await fetchWithRetry(
headers: { endpoint,
Authorization: `Hydro ${token}`, {
'Content-Type': 'application/json', method: 'POST',
'X-Hydro-App': X_HYDRO_APP, body: requestBody,
headers: {
Authorization: `Hydro ${token}`,
'Content-Type': 'application/json',
'X-Hydro-App': X_HYDRO_APP,
},
}, },
throwHttpErrors: false, {
retry: { limit: RETRIES }, retries: RETRIES,
timeout: { request: TIMEOUT }, timeout: TIMEOUT,
}) throwHttpErrors: false,
const { statusCode, body } = response },
)
const statusCode = response.status
const body = await response.text()
statsd.increment('hydro.response_code.all', 1, [`response_code:${statusCode}`]) statsd.increment('hydro.response_code.all', 1, [`response_code:${statusCode}`])

View File

@@ -2,12 +2,13 @@
* Utility functions for fetch with retry and timeout functionality * Utility functions for fetch with retry and timeout functionality
* to replace got library functionality * to replace got library functionality
*/ */
export interface FetchWithRetryOptions { export interface FetchWithRetryOptions {
retries?: number retries?: number
retryDelay?: number retryDelay?: number
timeout?: number timeout?: number
throwHttpErrors?: boolean throwHttpErrors?: boolean
// Note: Custom HTTPS agents are not supported in native fetch
// Consider using undici or node-fetch if custom agent support is critical
} }
/** /**

View File

@@ -1,49 +1,31 @@
import got, { type OptionsOfTextResponseBody, type Method } from 'got' import { fetchWithRetry } from '@/frame/lib/fetch-utils'
import { Failbot, HTTPBackend } from '@github/failbot' import { Failbot, HTTPBackend } from '@github/failbot'
import { getLoggerContext } from '@/observability/logger/lib/logger-context' import { getLoggerContext } from '@/observability/logger/lib/logger-context'
const HAYSTACK_APP = 'docs' const HAYSTACK_APP = 'docs'
async function retryingGot(input: RequestInfo | URL, init?: RequestInit): Promise<Response> { async function retryingFetch(input: RequestInfo | URL, init?: RequestInit): Promise<Response> {
const url = typeof input === 'string' ? input : input.toString() const url = typeof input === 'string' ? input : input.toString()
// Extract body from fetch init for got options // Use fetchWithRetry with retry configuration matching got's behavior
const gotOptions: OptionsOfTextResponseBody = { // With the timeout at 3000 (milliseconds) and the retry.limit
method: (init?.method as Method) || 'GET', // at 4 (times), the total worst-case is:
body: typeof init?.body === 'string' ? init.body : undefined, // 3000 * 4 + 1000 + 2000 + 3000 + 4000 + 8000 = 30 seconds
headers: init?.headers as Record<string, string> | undefined, const response = await fetchWithRetry(
// With the timeout at 3000 (milliseconds) and the retry.limit url,
// at 4 (times), the total worst-case is: {
// 3000 * 4 + 1000 + 2000 + 3000 + 4000 + 8000 = 30 seconds method: init?.method || 'GET',
timeout: { body: init?.body,
response: 3000, headers: init?.headers,
}, },
retry: { {
// This means it will wait... timeout: 3000,
// 1. 1000ms retries: 4,
// 2. 2000ms throwHttpErrors: false, // Let failbot handle HTTP errors
// 3. 4000ms
// 4. 8000ms
// 5. give up!
//
// From the documentation:
//
// Delays between retries counts with function
// 1000 * Math.pow(2, retry - 1) + Math.random() * 100,
// where retry is attempt number (starts from 1).
//
limit: 4,
}, },
} )
const gotResponse = await got(url, gotOptions) return response
// Convert got response to fetch-compatible Response
return new Response(gotResponse.body, {
status: gotResponse.statusCode,
statusText: gotResponse.statusMessage,
headers: gotResponse.headers as HeadersInit,
})
} }
export function report(error: Error, metadata?: Record<string, unknown>) { export function report(error: Error, metadata?: Record<string, unknown>) {
@@ -55,7 +37,7 @@ export function report(error: Error, metadata?: Record<string, unknown>) {
const backends = [ const backends = [
new HTTPBackend({ new HTTPBackend({
haystackURL: process.env.HAYSTACK_URL, haystackURL: process.env.HAYSTACK_URL,
fetchFn: retryingGot, fetchFn: retryingFetch,
}), }),
] ]
const failbot = new Failbot({ const failbot = new Failbot({

View File

@@ -6,7 +6,7 @@ This file & middleware is for when a user requests our /search page e.g. 'docs.g
When a user directly hits our API e.g. /api/search/v1?query=foo, they will hit the routes in ./search-routes.ts When a user directly hits our API e.g. /api/search/v1?query=foo, they will hit the routes in ./search-routes.ts
*/ */
import got from 'got' import { fetchWithRetry } from '@/frame/lib/fetch-utils'
import { Request, Response, NextFunction } from 'express' import { Request, Response, NextFunction } from 'express'
import { errors } from '@elastic/elasticsearch' import { errors } from '@elastic/elasticsearch'
import statsd from '@/observability/lib/statsd' import statsd from '@/observability/lib/statsd'
@@ -172,5 +172,10 @@ async function getProxySearch(
// Add client_name for external API requests // Add client_name for external API requests
url.searchParams.set('client_name', 'docs.github.com-client') url.searchParams.set('client_name', 'docs.github.com-client')
console.log(`Proxying search to ${url}`) console.log(`Proxying search to ${url}`)
return got(url).json<GeneralSearchResponse>()
const response = await fetchWithRetry(url.toString())
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`)
}
return response.json() as Promise<GeneralSearchResponse>
} }

View File

@@ -2,7 +2,6 @@ import eventToPromise from 'event-to-promise'
import chalk from 'chalk' import chalk from 'chalk'
import dotenv from 'dotenv' import dotenv from 'dotenv'
import boxen from 'boxen' import boxen from 'boxen'
import { HTTPError } from 'got'
import languages from '@/languages/lib/languages' import languages from '@/languages/lib/languages'
import parsePageSectionsIntoRecords from '@/search/scripts/scrape/lib/parse-page-sections-into-records' import parsePageSectionsIntoRecords from '@/search/scripts/scrape/lib/parse-page-sections-into-records'
@@ -12,6 +11,23 @@ import { getAllVersionsKeyFromIndexVersion } from '@/search/lib/elasticsearch-ve
import type { Page, Permalink, Record, Config, Redirects } from '@/search/scripts/scrape/types' import type { Page, Permalink, Record, Config, Redirects } from '@/search/scripts/scrape/types'
// Custom error class to replace got's HTTPError
class HTTPError extends Error {
response: { ok: boolean; statusCode?: number }
request: { requestUrl?: { pathname?: string } }
constructor(
message: string,
response: { ok: boolean; statusCode?: number },
request: { requestUrl?: { pathname?: string } },
) {
super(message)
this.name = 'HTTPError'
this.response = response
this.request = request
}
}
const pageMarker = chalk.green('|') const pageMarker = chalk.green('|')
const recordMarker = chalk.grey('.') const recordMarker = chalk.grey('.')
const port = 4002 const port = 4002

View File

@@ -1,10 +1,27 @@
import { EventEmitter } from 'events' import { EventEmitter } from 'events'
import Bottleneck from 'bottleneck' import Bottleneck from 'bottleneck'
import got from 'got' import { fetchWithRetry } from '@/frame/lib/fetch-utils'
import cheerio from 'cheerio' import cheerio from 'cheerio'
import type { Permalink } from '@/search/scripts/scrape/types' import type { Permalink } from '@/search/scripts/scrape/types'
// Custom error class to match got's HTTPError interface
class HTTPError extends Error {
response: { ok: boolean; statusCode?: number }
request: { requestUrl?: { pathname?: string } }
constructor(
message: string,
response: { ok: boolean; statusCode?: number },
request: { requestUrl?: { pathname?: string } },
) {
super(message)
this.name = 'HTTPError'
this.response = response
this.request = request
}
}
interface DomWaiterOptions { interface DomWaiterOptions {
parseDOM?: boolean parseDOM?: boolean
json?: boolean json?: boolean
@@ -45,7 +62,15 @@ async function getPage(page: Permalink, emitter: EventEmitter, opts: DomWaiterOp
if (opts.json) { if (opts.json) {
try { try {
const json = await got(page.url!).json() const response = await fetchWithRetry(page.url!)
if (!response.ok) {
throw new HTTPError(
`HTTP ${response.status}: ${response.statusText}`,
{ ok: response.ok, statusCode: response.status },
{ requestUrl: { pathname: page.url } },
)
}
const json = await response.json()
const pageCopy = Object.assign({}, page, { json }) const pageCopy = Object.assign({}, page, { json })
emitter.emit('page', pageCopy) emitter.emit('page', pageCopy)
} catch (err) { } catch (err) {
@@ -53,7 +78,15 @@ async function getPage(page: Permalink, emitter: EventEmitter, opts: DomWaiterOp
} }
} else { } else {
try { try {
const body = (await got(page.url!)).body const response = await fetchWithRetry(page.url!)
if (!response.ok) {
throw new HTTPError(
`HTTP ${response.status}: ${response.statusText}`,
{ ok: response.ok, statusCode: response.status },
{ requestUrl: { pathname: page.url } },
)
}
const body = await response.text()
const pageCopy = Object.assign({}, page, { body }) const pageCopy = Object.assign({}, page, { body })
if (opts.parseDOM) (pageCopy as any).$ = cheerio.load(body) if (opts.parseDOM) (pageCopy as any).$ = cheerio.load(body)
emitter.emit('page', pageCopy) emitter.emit('page', pageCopy)

View File

@@ -39,7 +39,7 @@ import fs from 'fs'
import path from 'path' import path from 'path'
import cheerio from 'cheerio' import cheerio from 'cheerio'
import got from 'got' import { fetchWithRetry } from '@/frame/lib/fetch-utils'
interface ReadabilityMetrics { interface ReadabilityMetrics {
fleschReadingEase: number fleschReadingEase: number
@@ -174,7 +174,12 @@ async function waitForServer(): Promise<void> {
for (let attempt = 1; attempt <= maxAttempts; attempt++) { for (let attempt = 1; attempt <= maxAttempts; attempt++) {
try { try {
await got(makeURL('/'), { timeout: { request: 5000 } }) const response = await fetchWithRetry(makeURL('/'), undefined, {
timeout: 5000,
})
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`)
}
console.log('Server is ready!') console.log('Server is ready!')
return return
} catch (error) { } catch (error) {
@@ -202,18 +207,19 @@ async function analyzeFile(filePath: string): Promise<PageReadability | null> {
try { try {
// Fetch the rendered page // Fetch the rendered page
const response = await got(makeURL(urlPath), { const response = await fetchWithRetry(makeURL(urlPath), undefined, {
timeout: { request: 30000 }, timeout: 30000,
throwHttpErrors: false, throwHttpErrors: false,
}) })
if (response.statusCode !== 200) { if (response.status !== 200) {
console.warn(`Skipping ${urlPath}: HTTP ${response.statusCode}`) console.warn(`Skipping ${urlPath}: HTTP ${response.status}`)
return null return null
} }
// Parse HTML and extract content // Parse HTML and extract content
const $ = cheerio.load(response.body) const body = await response.text()
const $ = cheerio.load(body)
// Get page title // Get page title
const title = $('h1').first().text().trim() || $('title').text().trim() || 'Untitled' const title = $('h1').first().text().trim() || $('title').text().trim() || 'Untitled'