diff --git a/src/search/scripts/scrape/lib/build-records.ts b/src/search/scripts/scrape/lib/build-records.ts index 479d3a06b0..b2f1d734b1 100644 --- a/src/search/scripts/scrape/lib/build-records.ts +++ b/src/search/scripts/scrape/lib/build-records.ts @@ -37,10 +37,10 @@ dotenv.config() // These defaults are known to work fine in GitHub Actions. // For local development, you can override these in your local .env file. // For example: -// echo 'BUILD_RECORDS_MAX_CONCURRENT=20' >> .env -// echo 'BUILD_RECORDS_MIN_TIME=50' >> .env -const MAX_CONCURRENT = parseInt(process.env.BUILD_RECORDS_MAX_CONCURRENT || '100', 10) -const MIN_TIME = parseInt(process.env.BUILD_RECORDS_MIN_TIME || '5', 10) +// echo 'BUILD_RECORDS_MAX_CONCURRENT=5' >> .env +// echo 'BUILD_RECORDS_MIN_TIME=200' >> .env +const MAX_CONCURRENT = parseInt(process.env.BUILD_RECORDS_MAX_CONCURRENT || '5', 10) +const MIN_TIME = parseInt(process.env.BUILD_RECORDS_MIN_TIME || '200', 10) // These products, forcibly always get a popularity of 0 independent of // their actual popularity which comes from an external JSON file. diff --git a/src/search/scripts/scrape/lib/domwaiter.ts b/src/search/scripts/scrape/lib/domwaiter.ts index 4dd2b12451..2dc28c4dc5 100644 --- a/src/search/scripts/scrape/lib/domwaiter.ts +++ b/src/search/scripts/scrape/lib/domwaiter.ts @@ -62,7 +62,7 @@ async function getPage(page: Permalink, emitter: EventEmitter, opts: DomWaiterOp if (opts.json) { try { - const response = await fetchWithRetry(page.url!, undefined, { retries: 3 }) + const response = await fetchWithRetry(page.url!, undefined, { retries: 3, timeout: 60000 }) if (!response.ok) { throw new HTTPError( `HTTP ${response.status}: ${response.statusText}`, @@ -74,11 +74,14 @@ async function getPage(page: Permalink, emitter: EventEmitter, opts: DomWaiterOp const pageCopy = Object.assign({}, page, { json }) emitter.emit('page', pageCopy) } catch (err) { + if (err instanceof Error) { + err.message = `Failed to fetch ${page.url}: ${err.message}` + } emitter.emit('error', err) } } else { try { - const response = await fetchWithRetry(page.url!, undefined, { retries: 3 }) + const response = await fetchWithRetry(page.url!, undefined, { retries: 3, timeout: 60000 }) if (!response.ok) { throw new HTTPError( `HTTP ${response.status}: ${response.statusText}`, @@ -91,6 +94,9 @@ async function getPage(page: Permalink, emitter: EventEmitter, opts: DomWaiterOp if (opts.parseDOM) (pageCopy as any).$ = cheerio.load(body) emitter.emit('page', pageCopy) } catch (err) { + if (err instanceof Error) { + err.message = `Failed to fetch ${page.url}: ${err.message}` + } emitter.emit('error', err) } }