Replace got with fetch in 7 files (Phase 3A/3B) (#57193)
This commit is contained in:
@@ -1,6 +1,5 @@
|
|||||||
import { createHmac } from 'crypto'
|
import { createHmac } from 'crypto'
|
||||||
import { Agent } from 'node:https'
|
import { fetchWithRetry } from '@/frame/lib/fetch-utils'
|
||||||
import got from 'got'
|
|
||||||
import { isNil } from 'lodash-es'
|
import { isNil } from 'lodash-es'
|
||||||
import statsd from '@/observability/lib/statsd'
|
import statsd from '@/observability/lib/statsd'
|
||||||
import { report } from '@/observability/lib/failbot'
|
import { report } from '@/observability/lib/failbot'
|
||||||
@@ -15,7 +14,6 @@ const X_HYDRO_APP = 'docs-production'
|
|||||||
const CLUSTER = 'potomac' // We only have ability to publish externally to potomac cluster
|
const CLUSTER = 'potomac' // We only have ability to publish externally to potomac cluster
|
||||||
const TIMEOUT = MAX_REQUEST_TIMEOUT - 1000 // Limit because Express will terminate at MAX_REQUEST_TIMEOUT
|
const TIMEOUT = MAX_REQUEST_TIMEOUT - 1000 // Limit because Express will terminate at MAX_REQUEST_TIMEOUT
|
||||||
const RETRIES = 0 // We care about aggregate statistics; a few dropped events isn't a big deal
|
const RETRIES = 0 // We care about aggregate statistics; a few dropped events isn't a big deal
|
||||||
const httpsAgent = new Agent({ keepAlive: true, maxSockets: 32 }) // keepAlive: https://gh.io/AAk2qio -- 32: https://bit.ly/3Tywd1U
|
|
||||||
const { NODE_ENV, HYDRO_SECRET, HYDRO_ENDPOINT } = process.env
|
const { NODE_ENV, HYDRO_SECRET, HYDRO_ENDPOINT } = process.env
|
||||||
const inProd = NODE_ENV === 'production'
|
const inProd = NODE_ENV === 'production'
|
||||||
|
|
||||||
@@ -48,19 +46,27 @@ async function _publish(
|
|||||||
})
|
})
|
||||||
const token = createHmac('sha256', secret).update(requestBody).digest('hex')
|
const token = createHmac('sha256', secret).update(requestBody).digest('hex')
|
||||||
|
|
||||||
const response = await got.post(endpoint, {
|
// Note: Custom HTTPS agent (keepAlive, maxSockets) not supported with native fetch
|
||||||
body: requestBody,
|
// Consider using undici.fetch() if custom agent behavior is critical
|
||||||
agent: { https: httpsAgent },
|
const response = await fetchWithRetry(
|
||||||
headers: {
|
endpoint,
|
||||||
Authorization: `Hydro ${token}`,
|
{
|
||||||
'Content-Type': 'application/json',
|
method: 'POST',
|
||||||
'X-Hydro-App': X_HYDRO_APP,
|
body: requestBody,
|
||||||
|
headers: {
|
||||||
|
Authorization: `Hydro ${token}`,
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'X-Hydro-App': X_HYDRO_APP,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
throwHttpErrors: false,
|
{
|
||||||
retry: { limit: RETRIES },
|
retries: RETRIES,
|
||||||
timeout: { request: TIMEOUT },
|
timeout: TIMEOUT,
|
||||||
})
|
throwHttpErrors: false,
|
||||||
const { statusCode, body } = response
|
},
|
||||||
|
)
|
||||||
|
const statusCode = response.status
|
||||||
|
const body = await response.text()
|
||||||
|
|
||||||
statsd.increment('hydro.response_code.all', 1, [`response_code:${statusCode}`])
|
statsd.increment('hydro.response_code.all', 1, [`response_code:${statusCode}`])
|
||||||
|
|
||||||
|
|||||||
@@ -2,12 +2,13 @@
|
|||||||
* Utility functions for fetch with retry and timeout functionality
|
* Utility functions for fetch with retry and timeout functionality
|
||||||
* to replace got library functionality
|
* to replace got library functionality
|
||||||
*/
|
*/
|
||||||
|
|
||||||
export interface FetchWithRetryOptions {
|
export interface FetchWithRetryOptions {
|
||||||
retries?: number
|
retries?: number
|
||||||
retryDelay?: number
|
retryDelay?: number
|
||||||
timeout?: number
|
timeout?: number
|
||||||
throwHttpErrors?: boolean
|
throwHttpErrors?: boolean
|
||||||
|
// Note: Custom HTTPS agents are not supported in native fetch
|
||||||
|
// Consider using undici or node-fetch if custom agent support is critical
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -1,49 +1,31 @@
|
|||||||
import got, { type OptionsOfTextResponseBody, type Method } from 'got'
|
import { fetchWithRetry } from '@/frame/lib/fetch-utils'
|
||||||
import { Failbot, HTTPBackend } from '@github/failbot'
|
import { Failbot, HTTPBackend } from '@github/failbot'
|
||||||
import { getLoggerContext } from '@/observability/logger/lib/logger-context'
|
import { getLoggerContext } from '@/observability/logger/lib/logger-context'
|
||||||
|
|
||||||
const HAYSTACK_APP = 'docs'
|
const HAYSTACK_APP = 'docs'
|
||||||
|
|
||||||
async function retryingGot(input: RequestInfo | URL, init?: RequestInit): Promise<Response> {
|
async function retryingFetch(input: RequestInfo | URL, init?: RequestInit): Promise<Response> {
|
||||||
const url = typeof input === 'string' ? input : input.toString()
|
const url = typeof input === 'string' ? input : input.toString()
|
||||||
|
|
||||||
// Extract body from fetch init for got options
|
// Use fetchWithRetry with retry configuration matching got's behavior
|
||||||
const gotOptions: OptionsOfTextResponseBody = {
|
// With the timeout at 3000 (milliseconds) and the retry.limit
|
||||||
method: (init?.method as Method) || 'GET',
|
// at 4 (times), the total worst-case is:
|
||||||
body: typeof init?.body === 'string' ? init.body : undefined,
|
// 3000 * 4 + 1000 + 2000 + 3000 + 4000 + 8000 = 30 seconds
|
||||||
headers: init?.headers as Record<string, string> | undefined,
|
const response = await fetchWithRetry(
|
||||||
// With the timeout at 3000 (milliseconds) and the retry.limit
|
url,
|
||||||
// at 4 (times), the total worst-case is:
|
{
|
||||||
// 3000 * 4 + 1000 + 2000 + 3000 + 4000 + 8000 = 30 seconds
|
method: init?.method || 'GET',
|
||||||
timeout: {
|
body: init?.body,
|
||||||
response: 3000,
|
headers: init?.headers,
|
||||||
},
|
},
|
||||||
retry: {
|
{
|
||||||
// This means it will wait...
|
timeout: 3000,
|
||||||
// 1. 1000ms
|
retries: 4,
|
||||||
// 2. 2000ms
|
throwHttpErrors: false, // Let failbot handle HTTP errors
|
||||||
// 3. 4000ms
|
|
||||||
// 4. 8000ms
|
|
||||||
// 5. give up!
|
|
||||||
//
|
|
||||||
// From the documentation:
|
|
||||||
//
|
|
||||||
// Delays between retries counts with function
|
|
||||||
// 1000 * Math.pow(2, retry - 1) + Math.random() * 100,
|
|
||||||
// where retry is attempt number (starts from 1).
|
|
||||||
//
|
|
||||||
limit: 4,
|
|
||||||
},
|
},
|
||||||
}
|
)
|
||||||
|
|
||||||
const gotResponse = await got(url, gotOptions)
|
return response
|
||||||
|
|
||||||
// Convert got response to fetch-compatible Response
|
|
||||||
return new Response(gotResponse.body, {
|
|
||||||
status: gotResponse.statusCode,
|
|
||||||
statusText: gotResponse.statusMessage,
|
|
||||||
headers: gotResponse.headers as HeadersInit,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export function report(error: Error, metadata?: Record<string, unknown>) {
|
export function report(error: Error, metadata?: Record<string, unknown>) {
|
||||||
@@ -55,7 +37,7 @@ export function report(error: Error, metadata?: Record<string, unknown>) {
|
|||||||
const backends = [
|
const backends = [
|
||||||
new HTTPBackend({
|
new HTTPBackend({
|
||||||
haystackURL: process.env.HAYSTACK_URL,
|
haystackURL: process.env.HAYSTACK_URL,
|
||||||
fetchFn: retryingGot,
|
fetchFn: retryingFetch,
|
||||||
}),
|
}),
|
||||||
]
|
]
|
||||||
const failbot = new Failbot({
|
const failbot = new Failbot({
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ This file & middleware is for when a user requests our /search page e.g. 'docs.g
|
|||||||
When a user directly hits our API e.g. /api/search/v1?query=foo, they will hit the routes in ./search-routes.ts
|
When a user directly hits our API e.g. /api/search/v1?query=foo, they will hit the routes in ./search-routes.ts
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import got from 'got'
|
import { fetchWithRetry } from '@/frame/lib/fetch-utils'
|
||||||
import { Request, Response, NextFunction } from 'express'
|
import { Request, Response, NextFunction } from 'express'
|
||||||
import { errors } from '@elastic/elasticsearch'
|
import { errors } from '@elastic/elasticsearch'
|
||||||
import statsd from '@/observability/lib/statsd'
|
import statsd from '@/observability/lib/statsd'
|
||||||
@@ -172,5 +172,10 @@ async function getProxySearch(
|
|||||||
// Add client_name for external API requests
|
// Add client_name for external API requests
|
||||||
url.searchParams.set('client_name', 'docs.github.com-client')
|
url.searchParams.set('client_name', 'docs.github.com-client')
|
||||||
console.log(`Proxying search to ${url}`)
|
console.log(`Proxying search to ${url}`)
|
||||||
return got(url).json<GeneralSearchResponse>()
|
|
||||||
|
const response = await fetchWithRetry(url.toString())
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`HTTP ${response.status}: ${response.statusText}`)
|
||||||
|
}
|
||||||
|
return response.json() as Promise<GeneralSearchResponse>
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ import eventToPromise from 'event-to-promise'
|
|||||||
import chalk from 'chalk'
|
import chalk from 'chalk'
|
||||||
import dotenv from 'dotenv'
|
import dotenv from 'dotenv'
|
||||||
import boxen from 'boxen'
|
import boxen from 'boxen'
|
||||||
import { HTTPError } from 'got'
|
|
||||||
|
|
||||||
import languages from '@/languages/lib/languages'
|
import languages from '@/languages/lib/languages'
|
||||||
import parsePageSectionsIntoRecords from '@/search/scripts/scrape/lib/parse-page-sections-into-records'
|
import parsePageSectionsIntoRecords from '@/search/scripts/scrape/lib/parse-page-sections-into-records'
|
||||||
@@ -12,6 +11,23 @@ import { getAllVersionsKeyFromIndexVersion } from '@/search/lib/elasticsearch-ve
|
|||||||
|
|
||||||
import type { Page, Permalink, Record, Config, Redirects } from '@/search/scripts/scrape/types'
|
import type { Page, Permalink, Record, Config, Redirects } from '@/search/scripts/scrape/types'
|
||||||
|
|
||||||
|
// Custom error class to replace got's HTTPError
|
||||||
|
class HTTPError extends Error {
|
||||||
|
response: { ok: boolean; statusCode?: number }
|
||||||
|
request: { requestUrl?: { pathname?: string } }
|
||||||
|
|
||||||
|
constructor(
|
||||||
|
message: string,
|
||||||
|
response: { ok: boolean; statusCode?: number },
|
||||||
|
request: { requestUrl?: { pathname?: string } },
|
||||||
|
) {
|
||||||
|
super(message)
|
||||||
|
this.name = 'HTTPError'
|
||||||
|
this.response = response
|
||||||
|
this.request = request
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const pageMarker = chalk.green('|')
|
const pageMarker = chalk.green('|')
|
||||||
const recordMarker = chalk.grey('.')
|
const recordMarker = chalk.grey('.')
|
||||||
const port = 4002
|
const port = 4002
|
||||||
|
|||||||
@@ -1,10 +1,27 @@
|
|||||||
import { EventEmitter } from 'events'
|
import { EventEmitter } from 'events'
|
||||||
import Bottleneck from 'bottleneck'
|
import Bottleneck from 'bottleneck'
|
||||||
import got from 'got'
|
import { fetchWithRetry } from '@/frame/lib/fetch-utils'
|
||||||
import cheerio from 'cheerio'
|
import cheerio from 'cheerio'
|
||||||
|
|
||||||
import type { Permalink } from '@/search/scripts/scrape/types'
|
import type { Permalink } from '@/search/scripts/scrape/types'
|
||||||
|
|
||||||
|
// Custom error class to match got's HTTPError interface
|
||||||
|
class HTTPError extends Error {
|
||||||
|
response: { ok: boolean; statusCode?: number }
|
||||||
|
request: { requestUrl?: { pathname?: string } }
|
||||||
|
|
||||||
|
constructor(
|
||||||
|
message: string,
|
||||||
|
response: { ok: boolean; statusCode?: number },
|
||||||
|
request: { requestUrl?: { pathname?: string } },
|
||||||
|
) {
|
||||||
|
super(message)
|
||||||
|
this.name = 'HTTPError'
|
||||||
|
this.response = response
|
||||||
|
this.request = request
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
interface DomWaiterOptions {
|
interface DomWaiterOptions {
|
||||||
parseDOM?: boolean
|
parseDOM?: boolean
|
||||||
json?: boolean
|
json?: boolean
|
||||||
@@ -45,7 +62,15 @@ async function getPage(page: Permalink, emitter: EventEmitter, opts: DomWaiterOp
|
|||||||
|
|
||||||
if (opts.json) {
|
if (opts.json) {
|
||||||
try {
|
try {
|
||||||
const json = await got(page.url!).json()
|
const response = await fetchWithRetry(page.url!)
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new HTTPError(
|
||||||
|
`HTTP ${response.status}: ${response.statusText}`,
|
||||||
|
{ ok: response.ok, statusCode: response.status },
|
||||||
|
{ requestUrl: { pathname: page.url } },
|
||||||
|
)
|
||||||
|
}
|
||||||
|
const json = await response.json()
|
||||||
const pageCopy = Object.assign({}, page, { json })
|
const pageCopy = Object.assign({}, page, { json })
|
||||||
emitter.emit('page', pageCopy)
|
emitter.emit('page', pageCopy)
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
@@ -53,7 +78,15 @@ async function getPage(page: Permalink, emitter: EventEmitter, opts: DomWaiterOp
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
try {
|
try {
|
||||||
const body = (await got(page.url!)).body
|
const response = await fetchWithRetry(page.url!)
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new HTTPError(
|
||||||
|
`HTTP ${response.status}: ${response.statusText}`,
|
||||||
|
{ ok: response.ok, statusCode: response.status },
|
||||||
|
{ requestUrl: { pathname: page.url } },
|
||||||
|
)
|
||||||
|
}
|
||||||
|
const body = await response.text()
|
||||||
const pageCopy = Object.assign({}, page, { body })
|
const pageCopy = Object.assign({}, page, { body })
|
||||||
if (opts.parseDOM) (pageCopy as any).$ = cheerio.load(body)
|
if (opts.parseDOM) (pageCopy as any).$ = cheerio.load(body)
|
||||||
emitter.emit('page', pageCopy)
|
emitter.emit('page', pageCopy)
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ import fs from 'fs'
|
|||||||
import path from 'path'
|
import path from 'path'
|
||||||
|
|
||||||
import cheerio from 'cheerio'
|
import cheerio from 'cheerio'
|
||||||
import got from 'got'
|
import { fetchWithRetry } from '@/frame/lib/fetch-utils'
|
||||||
|
|
||||||
interface ReadabilityMetrics {
|
interface ReadabilityMetrics {
|
||||||
fleschReadingEase: number
|
fleschReadingEase: number
|
||||||
@@ -174,7 +174,12 @@ async function waitForServer(): Promise<void> {
|
|||||||
|
|
||||||
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
||||||
try {
|
try {
|
||||||
await got(makeURL('/'), { timeout: { request: 5000 } })
|
const response = await fetchWithRetry(makeURL('/'), undefined, {
|
||||||
|
timeout: 5000,
|
||||||
|
})
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`HTTP ${response.status}: ${response.statusText}`)
|
||||||
|
}
|
||||||
console.log('Server is ready!')
|
console.log('Server is ready!')
|
||||||
return
|
return
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
@@ -202,18 +207,19 @@ async function analyzeFile(filePath: string): Promise<PageReadability | null> {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
// Fetch the rendered page
|
// Fetch the rendered page
|
||||||
const response = await got(makeURL(urlPath), {
|
const response = await fetchWithRetry(makeURL(urlPath), undefined, {
|
||||||
timeout: { request: 30000 },
|
timeout: 30000,
|
||||||
throwHttpErrors: false,
|
throwHttpErrors: false,
|
||||||
})
|
})
|
||||||
|
|
||||||
if (response.statusCode !== 200) {
|
if (response.status !== 200) {
|
||||||
console.warn(`Skipping ${urlPath}: HTTP ${response.statusCode}`)
|
console.warn(`Skipping ${urlPath}: HTTP ${response.status}`)
|
||||||
return null
|
return null
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse HTML and extract content
|
// Parse HTML and extract content
|
||||||
const $ = cheerio.load(response.body)
|
const body = await response.text()
|
||||||
|
const $ = cheerio.load(body)
|
||||||
|
|
||||||
// Get page title
|
// Get page title
|
||||||
const title = $('h1').first().text().trim() || $('title').text().trim() || 'Untitled'
|
const title = $('h1').first().text().trim() || $('title').text().trim() || 'Untitled'
|
||||||
|
|||||||
Reference in New Issue
Block a user