Replace got with fetch in 7 files (Phase 3A/3B) (#57193)
This commit is contained in:
@@ -1,6 +1,5 @@
|
||||
import { createHmac } from 'crypto'
|
||||
import { Agent } from 'node:https'
|
||||
import got from 'got'
|
||||
import { fetchWithRetry } from '@/frame/lib/fetch-utils'
|
||||
import { isNil } from 'lodash-es'
|
||||
import statsd from '@/observability/lib/statsd'
|
||||
import { report } from '@/observability/lib/failbot'
|
||||
@@ -15,7 +14,6 @@ const X_HYDRO_APP = 'docs-production'
|
||||
const CLUSTER = 'potomac' // We only have ability to publish externally to potomac cluster
|
||||
const TIMEOUT = MAX_REQUEST_TIMEOUT - 1000 // Limit because Express will terminate at MAX_REQUEST_TIMEOUT
|
||||
const RETRIES = 0 // We care about aggregate statistics; a few dropped events isn't a big deal
|
||||
const httpsAgent = new Agent({ keepAlive: true, maxSockets: 32 }) // keepAlive: https://gh.io/AAk2qio -- 32: https://bit.ly/3Tywd1U
|
||||
const { NODE_ENV, HYDRO_SECRET, HYDRO_ENDPOINT } = process.env
|
||||
const inProd = NODE_ENV === 'production'
|
||||
|
||||
@@ -48,19 +46,27 @@ async function _publish(
|
||||
})
|
||||
const token = createHmac('sha256', secret).update(requestBody).digest('hex')
|
||||
|
||||
const response = await got.post(endpoint, {
|
||||
body: requestBody,
|
||||
agent: { https: httpsAgent },
|
||||
headers: {
|
||||
Authorization: `Hydro ${token}`,
|
||||
'Content-Type': 'application/json',
|
||||
'X-Hydro-App': X_HYDRO_APP,
|
||||
// Note: Custom HTTPS agent (keepAlive, maxSockets) not supported with native fetch
|
||||
// Consider using undici.fetch() if custom agent behavior is critical
|
||||
const response = await fetchWithRetry(
|
||||
endpoint,
|
||||
{
|
||||
method: 'POST',
|
||||
body: requestBody,
|
||||
headers: {
|
||||
Authorization: `Hydro ${token}`,
|
||||
'Content-Type': 'application/json',
|
||||
'X-Hydro-App': X_HYDRO_APP,
|
||||
},
|
||||
},
|
||||
throwHttpErrors: false,
|
||||
retry: { limit: RETRIES },
|
||||
timeout: { request: TIMEOUT },
|
||||
})
|
||||
const { statusCode, body } = response
|
||||
{
|
||||
retries: RETRIES,
|
||||
timeout: TIMEOUT,
|
||||
throwHttpErrors: false,
|
||||
},
|
||||
)
|
||||
const statusCode = response.status
|
||||
const body = await response.text()
|
||||
|
||||
statsd.increment('hydro.response_code.all', 1, [`response_code:${statusCode}`])
|
||||
|
||||
|
||||
@@ -2,12 +2,13 @@
|
||||
* Utility functions for fetch with retry and timeout functionality
|
||||
* to replace got library functionality
|
||||
*/
|
||||
|
||||
export interface FetchWithRetryOptions {
|
||||
retries?: number
|
||||
retryDelay?: number
|
||||
timeout?: number
|
||||
throwHttpErrors?: boolean
|
||||
// Note: Custom HTTPS agents are not supported in native fetch
|
||||
// Consider using undici or node-fetch if custom agent support is critical
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,49 +1,31 @@
|
||||
import got, { type OptionsOfTextResponseBody, type Method } from 'got'
|
||||
import { fetchWithRetry } from '@/frame/lib/fetch-utils'
|
||||
import { Failbot, HTTPBackend } from '@github/failbot'
|
||||
import { getLoggerContext } from '@/observability/logger/lib/logger-context'
|
||||
|
||||
const HAYSTACK_APP = 'docs'
|
||||
|
||||
async function retryingGot(input: RequestInfo | URL, init?: RequestInit): Promise<Response> {
|
||||
async function retryingFetch(input: RequestInfo | URL, init?: RequestInit): Promise<Response> {
|
||||
const url = typeof input === 'string' ? input : input.toString()
|
||||
|
||||
// Extract body from fetch init for got options
|
||||
const gotOptions: OptionsOfTextResponseBody = {
|
||||
method: (init?.method as Method) || 'GET',
|
||||
body: typeof init?.body === 'string' ? init.body : undefined,
|
||||
headers: init?.headers as Record<string, string> | undefined,
|
||||
// With the timeout at 3000 (milliseconds) and the retry.limit
|
||||
// at 4 (times), the total worst-case is:
|
||||
// 3000 * 4 + 1000 + 2000 + 3000 + 4000 + 8000 = 30 seconds
|
||||
timeout: {
|
||||
response: 3000,
|
||||
// Use fetchWithRetry with retry configuration matching got's behavior
|
||||
// With the timeout at 3000 (milliseconds) and the retry.limit
|
||||
// at 4 (times), the total worst-case is:
|
||||
// 3000 * 4 + 1000 + 2000 + 3000 + 4000 + 8000 = 30 seconds
|
||||
const response = await fetchWithRetry(
|
||||
url,
|
||||
{
|
||||
method: init?.method || 'GET',
|
||||
body: init?.body,
|
||||
headers: init?.headers,
|
||||
},
|
||||
retry: {
|
||||
// This means it will wait...
|
||||
// 1. 1000ms
|
||||
// 2. 2000ms
|
||||
// 3. 4000ms
|
||||
// 4. 8000ms
|
||||
// 5. give up!
|
||||
//
|
||||
// From the documentation:
|
||||
//
|
||||
// Delays between retries counts with function
|
||||
// 1000 * Math.pow(2, retry - 1) + Math.random() * 100,
|
||||
// where retry is attempt number (starts from 1).
|
||||
//
|
||||
limit: 4,
|
||||
{
|
||||
timeout: 3000,
|
||||
retries: 4,
|
||||
throwHttpErrors: false, // Let failbot handle HTTP errors
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
const gotResponse = await got(url, gotOptions)
|
||||
|
||||
// Convert got response to fetch-compatible Response
|
||||
return new Response(gotResponse.body, {
|
||||
status: gotResponse.statusCode,
|
||||
statusText: gotResponse.statusMessage,
|
||||
headers: gotResponse.headers as HeadersInit,
|
||||
})
|
||||
return response
|
||||
}
|
||||
|
||||
export function report(error: Error, metadata?: Record<string, unknown>) {
|
||||
@@ -55,7 +37,7 @@ export function report(error: Error, metadata?: Record<string, unknown>) {
|
||||
const backends = [
|
||||
new HTTPBackend({
|
||||
haystackURL: process.env.HAYSTACK_URL,
|
||||
fetchFn: retryingGot,
|
||||
fetchFn: retryingFetch,
|
||||
}),
|
||||
]
|
||||
const failbot = new Failbot({
|
||||
|
||||
@@ -6,7 +6,7 @@ This file & middleware is for when a user requests our /search page e.g. 'docs.g
|
||||
When a user directly hits our API e.g. /api/search/v1?query=foo, they will hit the routes in ./search-routes.ts
|
||||
*/
|
||||
|
||||
import got from 'got'
|
||||
import { fetchWithRetry } from '@/frame/lib/fetch-utils'
|
||||
import { Request, Response, NextFunction } from 'express'
|
||||
import { errors } from '@elastic/elasticsearch'
|
||||
import statsd from '@/observability/lib/statsd'
|
||||
@@ -172,5 +172,10 @@ async function getProxySearch(
|
||||
// Add client_name for external API requests
|
||||
url.searchParams.set('client_name', 'docs.github.com-client')
|
||||
console.log(`Proxying search to ${url}`)
|
||||
return got(url).json<GeneralSearchResponse>()
|
||||
|
||||
const response = await fetchWithRetry(url.toString())
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${response.statusText}`)
|
||||
}
|
||||
return response.json() as Promise<GeneralSearchResponse>
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@ import eventToPromise from 'event-to-promise'
|
||||
import chalk from 'chalk'
|
||||
import dotenv from 'dotenv'
|
||||
import boxen from 'boxen'
|
||||
import { HTTPError } from 'got'
|
||||
|
||||
import languages from '@/languages/lib/languages'
|
||||
import parsePageSectionsIntoRecords from '@/search/scripts/scrape/lib/parse-page-sections-into-records'
|
||||
@@ -12,6 +11,23 @@ import { getAllVersionsKeyFromIndexVersion } from '@/search/lib/elasticsearch-ve
|
||||
|
||||
import type { Page, Permalink, Record, Config, Redirects } from '@/search/scripts/scrape/types'
|
||||
|
||||
// Custom error class to replace got's HTTPError
|
||||
class HTTPError extends Error {
|
||||
response: { ok: boolean; statusCode?: number }
|
||||
request: { requestUrl?: { pathname?: string } }
|
||||
|
||||
constructor(
|
||||
message: string,
|
||||
response: { ok: boolean; statusCode?: number },
|
||||
request: { requestUrl?: { pathname?: string } },
|
||||
) {
|
||||
super(message)
|
||||
this.name = 'HTTPError'
|
||||
this.response = response
|
||||
this.request = request
|
||||
}
|
||||
}
|
||||
|
||||
const pageMarker = chalk.green('|')
|
||||
const recordMarker = chalk.grey('.')
|
||||
const port = 4002
|
||||
|
||||
@@ -1,10 +1,27 @@
|
||||
import { EventEmitter } from 'events'
|
||||
import Bottleneck from 'bottleneck'
|
||||
import got from 'got'
|
||||
import { fetchWithRetry } from '@/frame/lib/fetch-utils'
|
||||
import cheerio from 'cheerio'
|
||||
|
||||
import type { Permalink } from '@/search/scripts/scrape/types'
|
||||
|
||||
// Custom error class to match got's HTTPError interface
|
||||
class HTTPError extends Error {
|
||||
response: { ok: boolean; statusCode?: number }
|
||||
request: { requestUrl?: { pathname?: string } }
|
||||
|
||||
constructor(
|
||||
message: string,
|
||||
response: { ok: boolean; statusCode?: number },
|
||||
request: { requestUrl?: { pathname?: string } },
|
||||
) {
|
||||
super(message)
|
||||
this.name = 'HTTPError'
|
||||
this.response = response
|
||||
this.request = request
|
||||
}
|
||||
}
|
||||
|
||||
interface DomWaiterOptions {
|
||||
parseDOM?: boolean
|
||||
json?: boolean
|
||||
@@ -45,7 +62,15 @@ async function getPage(page: Permalink, emitter: EventEmitter, opts: DomWaiterOp
|
||||
|
||||
if (opts.json) {
|
||||
try {
|
||||
const json = await got(page.url!).json()
|
||||
const response = await fetchWithRetry(page.url!)
|
||||
if (!response.ok) {
|
||||
throw new HTTPError(
|
||||
`HTTP ${response.status}: ${response.statusText}`,
|
||||
{ ok: response.ok, statusCode: response.status },
|
||||
{ requestUrl: { pathname: page.url } },
|
||||
)
|
||||
}
|
||||
const json = await response.json()
|
||||
const pageCopy = Object.assign({}, page, { json })
|
||||
emitter.emit('page', pageCopy)
|
||||
} catch (err) {
|
||||
@@ -53,7 +78,15 @@ async function getPage(page: Permalink, emitter: EventEmitter, opts: DomWaiterOp
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
const body = (await got(page.url!)).body
|
||||
const response = await fetchWithRetry(page.url!)
|
||||
if (!response.ok) {
|
||||
throw new HTTPError(
|
||||
`HTTP ${response.status}: ${response.statusText}`,
|
||||
{ ok: response.ok, statusCode: response.status },
|
||||
{ requestUrl: { pathname: page.url } },
|
||||
)
|
||||
}
|
||||
const body = await response.text()
|
||||
const pageCopy = Object.assign({}, page, { body })
|
||||
if (opts.parseDOM) (pageCopy as any).$ = cheerio.load(body)
|
||||
emitter.emit('page', pageCopy)
|
||||
|
||||
@@ -39,7 +39,7 @@ import fs from 'fs'
|
||||
import path from 'path'
|
||||
|
||||
import cheerio from 'cheerio'
|
||||
import got from 'got'
|
||||
import { fetchWithRetry } from '@/frame/lib/fetch-utils'
|
||||
|
||||
interface ReadabilityMetrics {
|
||||
fleschReadingEase: number
|
||||
@@ -174,7 +174,12 @@ async function waitForServer(): Promise<void> {
|
||||
|
||||
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
||||
try {
|
||||
await got(makeURL('/'), { timeout: { request: 5000 } })
|
||||
const response = await fetchWithRetry(makeURL('/'), undefined, {
|
||||
timeout: 5000,
|
||||
})
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${response.statusText}`)
|
||||
}
|
||||
console.log('Server is ready!')
|
||||
return
|
||||
} catch (error) {
|
||||
@@ -202,18 +207,19 @@ async function analyzeFile(filePath: string): Promise<PageReadability | null> {
|
||||
|
||||
try {
|
||||
// Fetch the rendered page
|
||||
const response = await got(makeURL(urlPath), {
|
||||
timeout: { request: 30000 },
|
||||
const response = await fetchWithRetry(makeURL(urlPath), undefined, {
|
||||
timeout: 30000,
|
||||
throwHttpErrors: false,
|
||||
})
|
||||
|
||||
if (response.statusCode !== 200) {
|
||||
console.warn(`Skipping ${urlPath}: HTTP ${response.statusCode}`)
|
||||
if (response.status !== 200) {
|
||||
console.warn(`Skipping ${urlPath}: HTTP ${response.status}`)
|
||||
return null
|
||||
}
|
||||
|
||||
// Parse HTML and extract content
|
||||
const $ = cheerio.load(response.body)
|
||||
const body = await response.text()
|
||||
const $ = cheerio.load(body)
|
||||
|
||||
// Get page title
|
||||
const title = $('h1').first().text().trim() || $('title').text().trim() || 'Untitled'
|
||||
|
||||
Reference in New Issue
Block a user