diff --git a/src/events/lib/hydro.ts b/src/events/lib/hydro.ts index 34242d41c8..3fb1e91e8b 100644 --- a/src/events/lib/hydro.ts +++ b/src/events/lib/hydro.ts @@ -1,6 +1,5 @@ import { createHmac } from 'crypto' -import { Agent } from 'node:https' -import got from 'got' +import { fetchWithRetry } from '@/frame/lib/fetch-utils' import { isNil } from 'lodash-es' import statsd from '@/observability/lib/statsd' import { report } from '@/observability/lib/failbot' @@ -15,7 +14,6 @@ const X_HYDRO_APP = 'docs-production' const CLUSTER = 'potomac' // We only have ability to publish externally to potomac cluster const TIMEOUT = MAX_REQUEST_TIMEOUT - 1000 // Limit because Express will terminate at MAX_REQUEST_TIMEOUT const RETRIES = 0 // We care about aggregate statistics; a few dropped events isn't a big deal -const httpsAgent = new Agent({ keepAlive: true, maxSockets: 32 }) // keepAlive: https://gh.io/AAk2qio -- 32: https://bit.ly/3Tywd1U const { NODE_ENV, HYDRO_SECRET, HYDRO_ENDPOINT } = process.env const inProd = NODE_ENV === 'production' @@ -48,19 +46,27 @@ async function _publish( }) const token = createHmac('sha256', secret).update(requestBody).digest('hex') - const response = await got.post(endpoint, { - body: requestBody, - agent: { https: httpsAgent }, - headers: { - Authorization: `Hydro ${token}`, - 'Content-Type': 'application/json', - 'X-Hydro-App': X_HYDRO_APP, + // Note: Custom HTTPS agent (keepAlive, maxSockets) not supported with native fetch + // Consider using undici.fetch() if custom agent behavior is critical + const response = await fetchWithRetry( + endpoint, + { + method: 'POST', + body: requestBody, + headers: { + Authorization: `Hydro ${token}`, + 'Content-Type': 'application/json', + 'X-Hydro-App': X_HYDRO_APP, + }, }, - throwHttpErrors: false, - retry: { limit: RETRIES }, - timeout: { request: TIMEOUT }, - }) - const { statusCode, body } = response + { + retries: RETRIES, + timeout: TIMEOUT, + throwHttpErrors: false, + }, + ) + const statusCode = response.status + const body = await response.text() statsd.increment('hydro.response_code.all', 1, [`response_code:${statusCode}`]) diff --git a/src/frame/lib/fetch-utils.ts b/src/frame/lib/fetch-utils.ts index c6e6017e7e..f90bc3f6c6 100644 --- a/src/frame/lib/fetch-utils.ts +++ b/src/frame/lib/fetch-utils.ts @@ -2,12 +2,13 @@ * Utility functions for fetch with retry and timeout functionality * to replace got library functionality */ - export interface FetchWithRetryOptions { retries?: number retryDelay?: number timeout?: number throwHttpErrors?: boolean + // Note: Custom HTTPS agents are not supported in native fetch + // Consider using undici or node-fetch if custom agent support is critical } /** diff --git a/src/observability/lib/failbot.ts b/src/observability/lib/failbot.ts index 9ca9b21d57..226def0ae6 100644 --- a/src/observability/lib/failbot.ts +++ b/src/observability/lib/failbot.ts @@ -1,49 +1,31 @@ -import got, { type OptionsOfTextResponseBody, type Method } from 'got' +import { fetchWithRetry } from '@/frame/lib/fetch-utils' import { Failbot, HTTPBackend } from '@github/failbot' import { getLoggerContext } from '@/observability/logger/lib/logger-context' const HAYSTACK_APP = 'docs' -async function retryingGot(input: RequestInfo | URL, init?: RequestInit): Promise { +async function retryingFetch(input: RequestInfo | URL, init?: RequestInit): Promise { const url = typeof input === 'string' ? input : input.toString() - // Extract body from fetch init for got options - const gotOptions: OptionsOfTextResponseBody = { - method: (init?.method as Method) || 'GET', - body: typeof init?.body === 'string' ? init.body : undefined, - headers: init?.headers as Record | undefined, - // With the timeout at 3000 (milliseconds) and the retry.limit - // at 4 (times), the total worst-case is: - // 3000 * 4 + 1000 + 2000 + 3000 + 4000 + 8000 = 30 seconds - timeout: { - response: 3000, + // Use fetchWithRetry with retry configuration matching got's behavior + // With the timeout at 3000 (milliseconds) and the retry.limit + // at 4 (times), the total worst-case is: + // 3000 * 4 + 1000 + 2000 + 3000 + 4000 + 8000 = 30 seconds + const response = await fetchWithRetry( + url, + { + method: init?.method || 'GET', + body: init?.body, + headers: init?.headers, }, - retry: { - // This means it will wait... - // 1. 1000ms - // 2. 2000ms - // 3. 4000ms - // 4. 8000ms - // 5. give up! - // - // From the documentation: - // - // Delays between retries counts with function - // 1000 * Math.pow(2, retry - 1) + Math.random() * 100, - // where retry is attempt number (starts from 1). - // - limit: 4, + { + timeout: 3000, + retries: 4, + throwHttpErrors: false, // Let failbot handle HTTP errors }, - } + ) - const gotResponse = await got(url, gotOptions) - - // Convert got response to fetch-compatible Response - return new Response(gotResponse.body, { - status: gotResponse.statusCode, - statusText: gotResponse.statusMessage, - headers: gotResponse.headers as HeadersInit, - }) + return response } export function report(error: Error, metadata?: Record) { @@ -55,7 +37,7 @@ export function report(error: Error, metadata?: Record) { const backends = [ new HTTPBackend({ haystackURL: process.env.HAYSTACK_URL, - fetchFn: retryingGot, + fetchFn: retryingFetch, }), ] const failbot = new Failbot({ diff --git a/src/search/middleware/general-search-middleware.ts b/src/search/middleware/general-search-middleware.ts index 3eda5b6ceb..c9533fcae7 100644 --- a/src/search/middleware/general-search-middleware.ts +++ b/src/search/middleware/general-search-middleware.ts @@ -6,7 +6,7 @@ This file & middleware is for when a user requests our /search page e.g. 'docs.g When a user directly hits our API e.g. /api/search/v1?query=foo, they will hit the routes in ./search-routes.ts */ -import got from 'got' +import { fetchWithRetry } from '@/frame/lib/fetch-utils' import { Request, Response, NextFunction } from 'express' import { errors } from '@elastic/elasticsearch' import statsd from '@/observability/lib/statsd' @@ -172,5 +172,10 @@ async function getProxySearch( // Add client_name for external API requests url.searchParams.set('client_name', 'docs.github.com-client') console.log(`Proxying search to ${url}`) - return got(url).json() + + const response = await fetchWithRetry(url.toString()) + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${response.statusText}`) + } + return response.json() as Promise } diff --git a/src/search/scripts/scrape/lib/build-records.ts b/src/search/scripts/scrape/lib/build-records.ts index 9b7e1643e8..479d3a06b0 100644 --- a/src/search/scripts/scrape/lib/build-records.ts +++ b/src/search/scripts/scrape/lib/build-records.ts @@ -2,7 +2,6 @@ import eventToPromise from 'event-to-promise' import chalk from 'chalk' import dotenv from 'dotenv' import boxen from 'boxen' -import { HTTPError } from 'got' import languages from '@/languages/lib/languages' import parsePageSectionsIntoRecords from '@/search/scripts/scrape/lib/parse-page-sections-into-records' @@ -12,6 +11,23 @@ import { getAllVersionsKeyFromIndexVersion } from '@/search/lib/elasticsearch-ve import type { Page, Permalink, Record, Config, Redirects } from '@/search/scripts/scrape/types' +// Custom error class to replace got's HTTPError +class HTTPError extends Error { + response: { ok: boolean; statusCode?: number } + request: { requestUrl?: { pathname?: string } } + + constructor( + message: string, + response: { ok: boolean; statusCode?: number }, + request: { requestUrl?: { pathname?: string } }, + ) { + super(message) + this.name = 'HTTPError' + this.response = response + this.request = request + } +} + const pageMarker = chalk.green('|') const recordMarker = chalk.grey('.') const port = 4002 diff --git a/src/search/scripts/scrape/lib/domwaiter.ts b/src/search/scripts/scrape/lib/domwaiter.ts index fe70a1d9fe..ac8cbe199a 100644 --- a/src/search/scripts/scrape/lib/domwaiter.ts +++ b/src/search/scripts/scrape/lib/domwaiter.ts @@ -1,10 +1,27 @@ import { EventEmitter } from 'events' import Bottleneck from 'bottleneck' -import got from 'got' +import { fetchWithRetry } from '@/frame/lib/fetch-utils' import cheerio from 'cheerio' import type { Permalink } from '@/search/scripts/scrape/types' +// Custom error class to match got's HTTPError interface +class HTTPError extends Error { + response: { ok: boolean; statusCode?: number } + request: { requestUrl?: { pathname?: string } } + + constructor( + message: string, + response: { ok: boolean; statusCode?: number }, + request: { requestUrl?: { pathname?: string } }, + ) { + super(message) + this.name = 'HTTPError' + this.response = response + this.request = request + } +} + interface DomWaiterOptions { parseDOM?: boolean json?: boolean @@ -45,7 +62,15 @@ async function getPage(page: Permalink, emitter: EventEmitter, opts: DomWaiterOp if (opts.json) { try { - const json = await got(page.url!).json() + const response = await fetchWithRetry(page.url!) + if (!response.ok) { + throw new HTTPError( + `HTTP ${response.status}: ${response.statusText}`, + { ok: response.ok, statusCode: response.status }, + { requestUrl: { pathname: page.url } }, + ) + } + const json = await response.json() const pageCopy = Object.assign({}, page, { json }) emitter.emit('page', pageCopy) } catch (err) { @@ -53,7 +78,15 @@ async function getPage(page: Permalink, emitter: EventEmitter, opts: DomWaiterOp } } else { try { - const body = (await got(page.url!)).body + const response = await fetchWithRetry(page.url!) + if (!response.ok) { + throw new HTTPError( + `HTTP ${response.status}: ${response.statusText}`, + { ok: response.ok, statusCode: response.status }, + { requestUrl: { pathname: page.url } }, + ) + } + const body = await response.text() const pageCopy = Object.assign({}, page, { body }) if (opts.parseDOM) (pageCopy as any).$ = cheerio.load(body) emitter.emit('page', pageCopy) diff --git a/src/workflows/experimental/readability-report.ts b/src/workflows/experimental/readability-report.ts index 8052748c26..0ca6d72324 100644 --- a/src/workflows/experimental/readability-report.ts +++ b/src/workflows/experimental/readability-report.ts @@ -39,7 +39,7 @@ import fs from 'fs' import path from 'path' import cheerio from 'cheerio' -import got from 'got' +import { fetchWithRetry } from '@/frame/lib/fetch-utils' interface ReadabilityMetrics { fleschReadingEase: number @@ -174,7 +174,12 @@ async function waitForServer(): Promise { for (let attempt = 1; attempt <= maxAttempts; attempt++) { try { - await got(makeURL('/'), { timeout: { request: 5000 } }) + const response = await fetchWithRetry(makeURL('/'), undefined, { + timeout: 5000, + }) + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${response.statusText}`) + } console.log('Server is ready!') return } catch (error) { @@ -202,18 +207,19 @@ async function analyzeFile(filePath: string): Promise { try { // Fetch the rendered page - const response = await got(makeURL(urlPath), { - timeout: { request: 30000 }, + const response = await fetchWithRetry(makeURL(urlPath), undefined, { + timeout: 30000, throwHttpErrors: false, }) - if (response.statusCode !== 200) { - console.warn(`Skipping ${urlPath}: HTTP ${response.statusCode}`) + if (response.status !== 200) { + console.warn(`Skipping ${urlPath}: HTTP ${response.status}`) return null } // Parse HTML and extract content - const $ = cheerio.load(response.body) + const body = await response.text() + const $ = cheerio.load(body) // Get page title const title = $('h1').first().text().trim() || $('title').text().trim() || 'Untitled'