merge in domwaiter (#31197)
This commit is contained in:
@@ -8,7 +8,7 @@ on:
|
||||
merge_group:
|
||||
pull_request:
|
||||
paths:
|
||||
- script/search/index-elasticsearch.js
|
||||
- 'script/search/**'
|
||||
- 'package*.json'
|
||||
- .github/workflows/dry-run-elasticsearch-indexing.yml
|
||||
|
||||
|
||||
152
package-lock.json
generated
152
package-lock.json
generated
@@ -16,6 +16,7 @@
|
||||
"accept-language-parser": "^1.5.0",
|
||||
"ajv": "^8.11.0",
|
||||
"ajv-formats": "^2.1.1",
|
||||
"bottleneck": "2.19.5",
|
||||
"cheerio": "^1.0.0-rc.11",
|
||||
"classnames": "^2.3.1",
|
||||
"connect-datadog": "0.0.9",
|
||||
@@ -119,7 +120,6 @@
|
||||
"cross-env": "^7.0.3",
|
||||
"csp-parse": "0.0.2",
|
||||
"dedent": "^0.7.0",
|
||||
"domwaiter": "^1.4.0",
|
||||
"eslint": "8.24.0",
|
||||
"eslint-config-prettier": "^8.5.0",
|
||||
"eslint-config-standard": "^17.0.0",
|
||||
@@ -164,7 +164,6 @@
|
||||
"node": ">=16.x"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"bottleneck": "^2.19.5",
|
||||
"esm": "^3.2.25",
|
||||
"image-size": "^1.0.1",
|
||||
"jest-puppeteer": "^5.0.4",
|
||||
@@ -4108,18 +4107,6 @@
|
||||
"tslib": "^2.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@szmarczak/http-timer": {
|
||||
"version": "4.0.6",
|
||||
"resolved": "https://registry.npmjs.org/@szmarczak/http-timer/-/http-timer-4.0.6.tgz",
|
||||
"integrity": "sha512-4BAffykYOgO+5nzBWYwE3W90sBgLJoUPRWWcL8wlyiM8IB8ipJz3UMJ9KXQd1RKQXpKp8Tutn80HZtWsu2u76w==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"defer-to-connect": "^2.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/babel__core": {
|
||||
"version": "7.1.19",
|
||||
"resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.1.19.tgz",
|
||||
@@ -6240,8 +6227,8 @@
|
||||
},
|
||||
"node_modules/bottleneck": {
|
||||
"version": "2.19.5",
|
||||
"devOptional": true,
|
||||
"license": "MIT"
|
||||
"resolved": "https://registry.npmjs.org/bottleneck/-/bottleneck-2.19.5.tgz",
|
||||
"integrity": "sha512-VHiNCbI1lKdl44tGrhNfU3lup0Tj/ZBMJB5/2ZbNXRCPuRCO7ed2mgcK4r17y+KB2EfuYuRaVlwNbAeaWGSpbw=="
|
||||
},
|
||||
"node_modules/boxen": {
|
||||
"version": "7.0.0",
|
||||
@@ -7573,64 +7560,6 @@
|
||||
"url": "https://github.com/fb55/domutils?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/domwaiter": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/domwaiter/-/domwaiter-1.4.0.tgz",
|
||||
"integrity": "sha512-k7dIRmg5/wMsET8FFZvrlZ2A81WOjc9D5DcVVoZxkwvo2hMPklYXPiS23h3Ez7zqyp25pmEn3Hzjq8agPiRxiw==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"bottleneck": "^2.19.5",
|
||||
"cheerio": "^1.0.0-rc.3",
|
||||
"got": "^11.8.5"
|
||||
}
|
||||
},
|
||||
"node_modules/domwaiter/node_modules/cacheable-lookup": {
|
||||
"version": "5.0.4",
|
||||
"resolved": "https://registry.npmjs.org/cacheable-lookup/-/cacheable-lookup-5.0.4.tgz",
|
||||
"integrity": "sha512-2/kNscPhpcxrOigMZzbiWF7dz8ilhb/nIHU3EyZiXWXpeq/au8qJ8VhdftMkty3n7Gj6HIGalQG8oiBNB3AJgA==",
|
||||
"dev": true,
|
||||
"engines": {
|
||||
"node": ">=10.6.0"
|
||||
}
|
||||
},
|
||||
"node_modules/domwaiter/node_modules/got": {
|
||||
"version": "11.8.5",
|
||||
"resolved": "https://registry.npmjs.org/got/-/got-11.8.5.tgz",
|
||||
"integrity": "sha512-o0Je4NvQObAuZPHLFoRSkdG2lTgtcynqymzg2Vupdx6PorhaT5MCbIyXG6d4D94kk8ZG57QeosgdiqfJWhEhlQ==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"@sindresorhus/is": "^4.0.0",
|
||||
"@szmarczak/http-timer": "^4.0.5",
|
||||
"@types/cacheable-request": "^6.0.1",
|
||||
"@types/responselike": "^1.0.0",
|
||||
"cacheable-lookup": "^5.0.3",
|
||||
"cacheable-request": "^7.0.2",
|
||||
"decompress-response": "^6.0.0",
|
||||
"http2-wrapper": "^1.0.0-beta.5.2",
|
||||
"lowercase-keys": "^2.0.0",
|
||||
"p-cancelable": "^2.0.0",
|
||||
"responselike": "^2.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10.19.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sindresorhus/got?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/domwaiter/node_modules/http2-wrapper": {
|
||||
"version": "1.0.3",
|
||||
"resolved": "https://registry.npmjs.org/http2-wrapper/-/http2-wrapper-1.0.3.tgz",
|
||||
"integrity": "sha512-V+23sDMr12Wnz7iTcDeJr3O6AIxlnvT/bmaAAAP/Xda35C90p9599p0F1eHR/N1KILWSoWVAiOMFjBBXaXSMxg==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"quick-lru": "^5.1.1",
|
||||
"resolve-alpn": "^1.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10.19.0"
|
||||
}
|
||||
},
|
||||
"node_modules/dot-case": {
|
||||
"version": "3.0.4",
|
||||
"dev": true,
|
||||
@@ -15723,15 +15652,6 @@
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/p-cancelable": {
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/p-cancelable/-/p-cancelable-2.1.1.tgz",
|
||||
"integrity": "sha512-BZOr3nRQHOntUjTrH8+Lh54smKHoHyur8We1V8DSMVrl5A2malOOwuJRnKRDjSnkoeBh4at6BwEnb5I7Jl31wg==",
|
||||
"dev": true,
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/p-limit": {
|
||||
"version": "3.1.0",
|
||||
"dev": true,
|
||||
@@ -23418,15 +23338,6 @@
|
||||
"tslib": "^2.4.0"
|
||||
}
|
||||
},
|
||||
"@szmarczak/http-timer": {
|
||||
"version": "4.0.6",
|
||||
"resolved": "https://registry.npmjs.org/@szmarczak/http-timer/-/http-timer-4.0.6.tgz",
|
||||
"integrity": "sha512-4BAffykYOgO+5nzBWYwE3W90sBgLJoUPRWWcL8wlyiM8IB8ipJz3UMJ9KXQd1RKQXpKp8Tutn80HZtWsu2u76w==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"defer-to-connect": "^2.0.0"
|
||||
}
|
||||
},
|
||||
"@types/babel__core": {
|
||||
"version": "7.1.19",
|
||||
"resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.1.19.tgz",
|
||||
@@ -25093,7 +25004,8 @@
|
||||
},
|
||||
"bottleneck": {
|
||||
"version": "2.19.5",
|
||||
"devOptional": true
|
||||
"resolved": "https://registry.npmjs.org/bottleneck/-/bottleneck-2.19.5.tgz",
|
||||
"integrity": "sha512-VHiNCbI1lKdl44tGrhNfU3lup0Tj/ZBMJB5/2ZbNXRCPuRCO7ed2mgcK4r17y+KB2EfuYuRaVlwNbAeaWGSpbw=="
|
||||
},
|
||||
"boxen": {
|
||||
"version": "7.0.0",
|
||||
@@ -25950,54 +25862,6 @@
|
||||
"domhandler": "^5.0.1"
|
||||
}
|
||||
},
|
||||
"domwaiter": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/domwaiter/-/domwaiter-1.4.0.tgz",
|
||||
"integrity": "sha512-k7dIRmg5/wMsET8FFZvrlZ2A81WOjc9D5DcVVoZxkwvo2hMPklYXPiS23h3Ez7zqyp25pmEn3Hzjq8agPiRxiw==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"bottleneck": "^2.19.5",
|
||||
"cheerio": "^1.0.0-rc.3",
|
||||
"got": "^11.8.5"
|
||||
},
|
||||
"dependencies": {
|
||||
"cacheable-lookup": {
|
||||
"version": "5.0.4",
|
||||
"resolved": "https://registry.npmjs.org/cacheable-lookup/-/cacheable-lookup-5.0.4.tgz",
|
||||
"integrity": "sha512-2/kNscPhpcxrOigMZzbiWF7dz8ilhb/nIHU3EyZiXWXpeq/au8qJ8VhdftMkty3n7Gj6HIGalQG8oiBNB3AJgA==",
|
||||
"dev": true
|
||||
},
|
||||
"got": {
|
||||
"version": "11.8.5",
|
||||
"resolved": "https://registry.npmjs.org/got/-/got-11.8.5.tgz",
|
||||
"integrity": "sha512-o0Je4NvQObAuZPHLFoRSkdG2lTgtcynqymzg2Vupdx6PorhaT5MCbIyXG6d4D94kk8ZG57QeosgdiqfJWhEhlQ==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"@sindresorhus/is": "^4.0.0",
|
||||
"@szmarczak/http-timer": "^4.0.5",
|
||||
"@types/cacheable-request": "^6.0.1",
|
||||
"@types/responselike": "^1.0.0",
|
||||
"cacheable-lookup": "^5.0.3",
|
||||
"cacheable-request": "^7.0.2",
|
||||
"decompress-response": "^6.0.0",
|
||||
"http2-wrapper": "^1.0.0-beta.5.2",
|
||||
"lowercase-keys": "^2.0.0",
|
||||
"p-cancelable": "^2.0.0",
|
||||
"responselike": "^2.0.0"
|
||||
}
|
||||
},
|
||||
"http2-wrapper": {
|
||||
"version": "1.0.3",
|
||||
"resolved": "https://registry.npmjs.org/http2-wrapper/-/http2-wrapper-1.0.3.tgz",
|
||||
"integrity": "sha512-V+23sDMr12Wnz7iTcDeJr3O6AIxlnvT/bmaAAAP/Xda35C90p9599p0F1eHR/N1KILWSoWVAiOMFjBBXaXSMxg==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"quick-lru": "^5.1.1",
|
||||
"resolve-alpn": "^1.0.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"dot-case": {
|
||||
"version": "3.0.4",
|
||||
"dev": true,
|
||||
@@ -31563,12 +31427,6 @@
|
||||
"version": "1.0.2",
|
||||
"devOptional": true
|
||||
},
|
||||
"p-cancelable": {
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/p-cancelable/-/p-cancelable-2.1.1.tgz",
|
||||
"integrity": "sha512-BZOr3nRQHOntUjTrH8+Lh54smKHoHyur8We1V8DSMVrl5A2malOOwuJRnKRDjSnkoeBh4at6BwEnb5I7Jl31wg==",
|
||||
"dev": true
|
||||
},
|
||||
"p-limit": {
|
||||
"version": "3.1.0",
|
||||
"dev": true,
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
"accept-language-parser": "^1.5.0",
|
||||
"ajv": "^8.11.0",
|
||||
"ajv-formats": "^2.1.1",
|
||||
"bottleneck": "2.19.5",
|
||||
"cheerio": "^1.0.0-rc.11",
|
||||
"classnames": "^2.3.1",
|
||||
"connect-datadog": "0.0.9",
|
||||
@@ -121,7 +122,6 @@
|
||||
"cross-env": "^7.0.3",
|
||||
"csp-parse": "0.0.2",
|
||||
"dedent": "^0.7.0",
|
||||
"domwaiter": "^1.4.0",
|
||||
"eslint": "8.24.0",
|
||||
"eslint-config-prettier": "^8.5.0",
|
||||
"eslint-config-standard": "^17.0.0",
|
||||
@@ -169,7 +169,6 @@
|
||||
"license": "(MIT AND CC-BY-4.0)",
|
||||
"name": "docs.github.com",
|
||||
"optionalDependencies": {
|
||||
"bottleneck": "^2.19.5",
|
||||
"esm": "^3.2.25",
|
||||
"image-size": "^1.0.1",
|
||||
"jest-puppeteer": "^5.0.4",
|
||||
|
||||
55
script/domwaiter.js
Normal file
55
script/domwaiter.js
Normal file
@@ -0,0 +1,55 @@
|
||||
import { EventEmitter } from 'node:events'
|
||||
import Bottleneck from 'bottleneck'
|
||||
import got from 'got'
|
||||
import cheerio from 'cheerio'
|
||||
|
||||
export default function domwaiter(pages, opts = {}) {
|
||||
const emitter = new EventEmitter()
|
||||
|
||||
const defaults = {
|
||||
parseDOM: true,
|
||||
json: false,
|
||||
maxConcurrent: 5,
|
||||
minTime: 500,
|
||||
}
|
||||
opts = Object.assign(defaults, opts)
|
||||
|
||||
const limiter = new Bottleneck(opts)
|
||||
|
||||
pages.forEach((page) => {
|
||||
limiter.schedule(getPage, page, emitter, opts)
|
||||
})
|
||||
|
||||
limiter
|
||||
.on('idle', () => {
|
||||
emitter.emit('done')
|
||||
})
|
||||
.on('error', (err) => {
|
||||
emitter.emit('error', err)
|
||||
})
|
||||
|
||||
return emitter
|
||||
}
|
||||
|
||||
async function getPage(page, emitter, opts) {
|
||||
emitter.emit('beforePageLoad', page)
|
||||
|
||||
if (opts.json) {
|
||||
try {
|
||||
const json = await got(page.url).json()
|
||||
const pageCopy = Object.assign({}, page, { json })
|
||||
emitter.emit('page', pageCopy)
|
||||
} catch (err) {
|
||||
emitter.emit('error', err)
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
const body = (await got(page.url)).body
|
||||
const pageCopy = Object.assign({}, page, { body })
|
||||
if (opts.parseDOM) pageCopy.$ = cheerio.load(body)
|
||||
emitter.emit('page', pageCopy)
|
||||
} catch (err) {
|
||||
emitter.emit('error', err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,11 +1,12 @@
|
||||
#!/usr/bin/env node
|
||||
import domwaiter from 'domwaiter'
|
||||
import eventToPromise from 'event-to-promise'
|
||||
import chalk from 'chalk'
|
||||
import dotenv from 'dotenv'
|
||||
import parsePageSectionsIntoRecords from './parse-page-sections-into-records.js'
|
||||
import getPopularPages from './popular-pages.js'
|
||||
import languages from '../../lib/languages.js'
|
||||
import domwaiter from '../domwaiter.js'
|
||||
|
||||
const pageMarker = chalk.green('|')
|
||||
const recordMarker = chalk.grey('.')
|
||||
const port = 4002
|
||||
|
||||
Reference in New Issue
Block a user