1
0
mirror of synced 2025-12-19 18:10:59 -05:00

Fix Elasticsearch indexing flakiness by reducing concurrency (#57998)

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Kevin Heis
2025-10-15 11:54:04 -07:00
committed by GitHub
parent 11d69f35b4
commit 7e4d07cce6
2 changed files with 12 additions and 6 deletions

View File

@@ -37,10 +37,10 @@ dotenv.config()
// These defaults are known to work fine in GitHub Actions.
// For local development, you can override these in your local .env file.
// For example:
// echo 'BUILD_RECORDS_MAX_CONCURRENT=20' >> .env
// echo 'BUILD_RECORDS_MIN_TIME=50' >> .env
const MAX_CONCURRENT = parseInt(process.env.BUILD_RECORDS_MAX_CONCURRENT || '100', 10)
const MIN_TIME = parseInt(process.env.BUILD_RECORDS_MIN_TIME || '5', 10)
// echo 'BUILD_RECORDS_MAX_CONCURRENT=5' >> .env
// echo 'BUILD_RECORDS_MIN_TIME=200' >> .env
const MAX_CONCURRENT = parseInt(process.env.BUILD_RECORDS_MAX_CONCURRENT || '5', 10)
const MIN_TIME = parseInt(process.env.BUILD_RECORDS_MIN_TIME || '200', 10)
// These products, forcibly always get a popularity of 0 independent of
// their actual popularity which comes from an external JSON file.

View File

@@ -62,7 +62,7 @@ async function getPage(page: Permalink, emitter: EventEmitter, opts: DomWaiterOp
if (opts.json) {
try {
const response = await fetchWithRetry(page.url!, undefined, { retries: 3 })
const response = await fetchWithRetry(page.url!, undefined, { retries: 3, timeout: 60000 })
if (!response.ok) {
throw new HTTPError(
`HTTP ${response.status}: ${response.statusText}`,
@@ -74,11 +74,14 @@ async function getPage(page: Permalink, emitter: EventEmitter, opts: DomWaiterOp
const pageCopy = Object.assign({}, page, { json })
emitter.emit('page', pageCopy)
} catch (err) {
if (err instanceof Error) {
err.message = `Failed to fetch ${page.url}: ${err.message}`
}
emitter.emit('error', err)
}
} else {
try {
const response = await fetchWithRetry(page.url!, undefined, { retries: 3 })
const response = await fetchWithRetry(page.url!, undefined, { retries: 3, timeout: 60000 })
if (!response.ok) {
throw new HTTPError(
`HTTP ${response.status}: ${response.statusText}`,
@@ -91,6 +94,9 @@ async function getPage(page: Permalink, emitter: EventEmitter, opts: DomWaiterOp
if (opts.parseDOM) (pageCopy as any).$ = cheerio.load(body)
emitter.emit('page', pageCopy)
} catch (err) {
if (err instanceof Error) {
err.message = `Failed to fetch ${page.url}: ${err.message}`
}
emitter.emit('error', err)
}
}