762 lines
23 KiB
JavaScript
Executable File
762 lines
23 KiB
JavaScript
Executable File
#!/usr/bin/env node
|
|
|
|
// [start-readme]
|
|
//
|
|
// This script goes through all content and renders their HTML and from there
|
|
// can analyze for various flaws (e.g. broken links)
|
|
//
|
|
// [end-readme]
|
|
|
|
import fs from 'fs'
|
|
import path from 'path'
|
|
import cheerio from 'cheerio'
|
|
import { program, Option, InvalidArgumentError } from 'commander'
|
|
import chalk from 'chalk'
|
|
import got, { RequestError } from 'got'
|
|
|
|
import shortVersions from '../middleware/contextualizers/short-versions.js'
|
|
import contextualize from '../middleware/context.js'
|
|
import { languageKeys } from '../lib/languages.js'
|
|
import getRedirect from '../lib/get-redirect.js'
|
|
import warmServer from '../lib/warm-server.js'
|
|
import renderContent from '../lib/render-content/index.js'
|
|
import { deprecated } from '../lib/enterprise-server-releases.js'
|
|
import excludedLinks from '../lib/excluded-links.js'
|
|
|
|
const STATIC_PREFIXES = {
|
|
assets: path.resolve('assets'),
|
|
public: path.resolve(path.join('data', 'graphql')),
|
|
}
|
|
// Sanity check that these are valid paths
|
|
Object.entries(STATIC_PREFIXES).forEach(([key, value]) => {
|
|
if (!fs.existsSync(value)) {
|
|
throw new Error(`Can't find static prefix (${key}): ${value}`)
|
|
}
|
|
})
|
|
|
|
// Return a function that can as quickly as possible check if a certain
|
|
// href input should be skipped.
|
|
// Do this so we can use a `Set` and a `iterable.some()` for a speedier
|
|
// check.
|
|
function linksToSkipFactory() {
|
|
const set = new Set(excludedLinks.filter((regexOrURL) => typeof regexOrURL === 'string'))
|
|
const regexes = excludedLinks.filter((regexOrURL) => regexOrURL instanceof RegExp)
|
|
return (href) => set.has(href) || regexes.some((regex) => regex.test(href))
|
|
}
|
|
|
|
const linksToSkip = linksToSkipFactory(excludedLinks)
|
|
|
|
const CONTENT_ROOT = path.resolve('content')
|
|
|
|
const deprecatedVersionPrefixesRegex = new RegExp(
|
|
`enterprise(-server@|/)(${deprecated.join('|')})(/|$)`
|
|
)
|
|
|
|
program
|
|
.description('Analyze all checked content files, render them, and check for flaws.')
|
|
.addOption(
|
|
new Option('-L, --level <LEVEL>', 'Filter of flaw level').choices([
|
|
'all',
|
|
'warning',
|
|
'critical',
|
|
])
|
|
)
|
|
.addOption(
|
|
new Option('-l, --language <LANGUAGE...>', 'Which languages to focus on').choices(languageKeys)
|
|
)
|
|
.option('--verbose-url <BASE_URL>', 'Print the absolute URL if set')
|
|
.option('-f, --filter <FILTER...>', 'Search filter(s) on the paths')
|
|
.option('-e, --exit', 'Exit script by count of flaws (useful for CI)')
|
|
.option('-b, --bail', 'Exit on the first flaw')
|
|
.option('--check-anchors', "Validate links that start with a '#' too")
|
|
.option('--check-images', 'Validate local images too')
|
|
.option('--check-external-links', 'Check external URLs too')
|
|
.option('-v, --verbose', 'Verbose outputs')
|
|
.option('--debug', "Loud about everything it's doing")
|
|
.option('--random', 'Load pages in a random order (useful for debugging)')
|
|
.option('--patient', 'Give external link checking longer timeouts and more retries')
|
|
.option('-o, --out <file>', 'Put warnings and errors into a file instead of stdout')
|
|
.option('--json-output', 'Print JSON to stdout or file instead')
|
|
.option('--max <number>', 'integer argument (default: none)', (value) => {
|
|
const parsed = parseInt(value, 10)
|
|
if (isNaN(parsed)) {
|
|
throw new InvalidArgumentError('Not a number.')
|
|
}
|
|
return parsed
|
|
})
|
|
.option(
|
|
'--list <file>.json',
|
|
'JSON file containing an array of specific files to check (default: none)',
|
|
(filePath) => {
|
|
const resolvedPath = path.resolve(filePath)
|
|
|
|
let stats
|
|
try {
|
|
stats = fs.statSync(resolvedPath)
|
|
} catch (error) {
|
|
// Ignore
|
|
}
|
|
|
|
if (!stats || !stats.isFile()) {
|
|
throw new InvalidArgumentError('Not an existing file.')
|
|
}
|
|
|
|
return resolvedPath
|
|
}
|
|
)
|
|
.arguments('[files...]', 'Specific files to check')
|
|
.parse(process.argv)
|
|
|
|
main(program.opts(), program.args)
|
|
|
|
async function main(opts, files) {
|
|
const {
|
|
random,
|
|
language,
|
|
filter,
|
|
exit,
|
|
debug,
|
|
max,
|
|
verbose,
|
|
list,
|
|
checkExternalLinks,
|
|
jsonOutput,
|
|
out,
|
|
} = opts
|
|
|
|
// Note! The reason we're using `warmServer()` in this script,
|
|
// even though there's no server involved, is because
|
|
// the `contextualize()` function calls it.
|
|
// And because warmServer() is actually idempotent, meaning it's
|
|
// cheap to call it more than once, it would be expensive to call it
|
|
// twice unnecessarily.
|
|
// If we'd manually do the same operations that `warmServer()` does
|
|
// here (e.g. `loadPageMap()`), we'd end up having to do it all over
|
|
// again, the next time `contextualize()` is called.
|
|
const { redirects, pages: pageMap, pageList } = await warmServer()
|
|
|
|
const languages = language || []
|
|
console.assert(Array.isArray(languages), `${languages} is not an array`)
|
|
const filters = filter || []
|
|
console.assert(Array.isArray(filters), `${filters} is not an array`)
|
|
|
|
if (list && Array.isArray(files) && files.length > 0) {
|
|
throw new InvalidArgumentError('Cannot specify both --list and a file list.')
|
|
}
|
|
|
|
if (list) {
|
|
const fileList = JSON.parse(await fs.promises.readFile(list))
|
|
if (Array.isArray(fileList) && fileList.length > 0) {
|
|
files = fileList
|
|
} else {
|
|
// This must be allowed for empty PRs that accompany docs-early-access repo PRs
|
|
console.warn('No files found in --list. Exiting...')
|
|
process.exit(0)
|
|
}
|
|
}
|
|
|
|
if (random) {
|
|
shuffle(pageList)
|
|
}
|
|
|
|
debug && console.time('getPages')
|
|
const pages = getPages(pageList, languages, filters, files, max)
|
|
debug && console.timeEnd('getPages')
|
|
|
|
if (checkExternalLinks && pages.length >= 100) {
|
|
console.warn(
|
|
chalk.yellow(
|
|
`Warning! Checking external URLs can be time costly. You're testing ${pages.length} pages.`
|
|
)
|
|
)
|
|
}
|
|
|
|
const processPagesStart = new Date()
|
|
const flawsGroups = await Promise.all(
|
|
pages.map((page) => processPage(page, pageMap, redirects, opts))
|
|
)
|
|
const processPagesEnd = new Date()
|
|
const flaws = flawsGroups.flat()
|
|
if (jsonOutput) {
|
|
jsonPrintFlaws(flaws, opts)
|
|
}
|
|
|
|
debug && printGlobalCacheHitRatio()
|
|
|
|
if (verbose) {
|
|
summarizeCounts(pages)
|
|
|
|
console.log(`Checked ${(globalCacheHitCount + globalCacheMissCount).toLocaleString()} links`)
|
|
console.log(`Took ${getDurationString(processPagesStart, processPagesEnd)}`)
|
|
|
|
summarizeFlaws(flaws)
|
|
if (out && flaws.length > 0) {
|
|
console.log(`All flaws written to ${chalk.bold(out)}`)
|
|
}
|
|
}
|
|
|
|
if (exit) {
|
|
process.exit(flaws.length)
|
|
}
|
|
}
|
|
|
|
function printGlobalCacheHitRatio() {
|
|
const hits = globalCacheHitCount
|
|
const misses = globalCacheMissCount
|
|
// It could be that the files that were tested didn't have a single
|
|
// link in them. In that case, there's no cache misses or hits at all.
|
|
// So avoid the division by zero.
|
|
if (misses + hits) {
|
|
console.log(
|
|
`Cache hit ratio: ${hits.toLocaleString()} of ${(misses + hits).toLocaleString()} (${(
|
|
(100 * hits) /
|
|
(misses + hits)
|
|
).toFixed(1)}%)`
|
|
)
|
|
}
|
|
}
|
|
|
|
function getDurationString(date1, date2) {
|
|
const seconds = (date2.getTime() - date1.getTime()) / 1000
|
|
const minutes = seconds / 60
|
|
if (minutes > 1) {
|
|
return `${minutes.toFixed(1)} minutes`
|
|
}
|
|
return `${seconds.toFixed(1)} seconds`
|
|
}
|
|
|
|
function getPages(pageList, languages, filters, files, max) {
|
|
return pageList
|
|
.filter((page) => {
|
|
if (languages.length && !languages.includes(page.languageCode)) {
|
|
return false
|
|
}
|
|
|
|
if (filters.length && !filters.find((filter) => page.relativePath.includes(filter))) {
|
|
return false
|
|
}
|
|
|
|
if (
|
|
files.length &&
|
|
// The reason for checking each file against the `relativePath`
|
|
// or the `fullPath` is to make it flexible for the user.
|
|
!files.find((file) => {
|
|
if (page.relativePath === file) return true
|
|
if (page.fullPath === file) return true
|
|
// The `page.relativePath` will always be *from* the containing
|
|
// directory it came from an might not be relative to the repo
|
|
// root. I.e.
|
|
// `content/education/quickstart.md` is the path relative to
|
|
// the repo root. But the `page.relativePath` will
|
|
// in this case be `education/quickstart.md`.
|
|
// So give it one last chance to relate to the repo root.
|
|
// This is important because you might use `git diff --name-only`
|
|
// to get the list of files to focus specifically on.
|
|
if (path.join(CONTENT_ROOT, page.relativePath) === path.resolve(file)) return true
|
|
return false
|
|
})
|
|
) {
|
|
return false
|
|
}
|
|
|
|
return true
|
|
})
|
|
.slice(0, max ? Math.min(max, pageList.length) : pageList.length)
|
|
}
|
|
|
|
async function processPage(page, pageMap, redirects, opts) {
|
|
const { bail, verboseUrl, jsonOutput, out } = opts
|
|
|
|
const allFlawsEach = await Promise.all(
|
|
page.permalinks.map((permalink) => processPermalink(permalink, page, pageMap, redirects, opts))
|
|
)
|
|
|
|
const allFlaws = allFlawsEach.flat()
|
|
|
|
if (bail && allFlaws.length > 0) {
|
|
if (jsonOutput) {
|
|
jsonPrintFlaws(allFlaws, opts)
|
|
} else {
|
|
printFlaws(allFlaws, { verboseUrl, out })
|
|
}
|
|
process.exit(1)
|
|
}
|
|
|
|
if (!jsonOutput) {
|
|
printFlaws(allFlaws, { verboseUrl, out })
|
|
}
|
|
|
|
return allFlaws
|
|
}
|
|
|
|
async function processPermalink(permalink, page, pageMap, redirects, opts) {
|
|
const { level, checkAnchors, checkImages, checkExternalLinks, verbose, patient } = opts
|
|
const html = await renderInnerHTML(page, permalink)
|
|
const $ = cheerio.load(html)
|
|
const flaws = []
|
|
const links = []
|
|
$('a[href]').each((i, link) => {
|
|
links.push(link)
|
|
})
|
|
const newFlaws = await Promise.all(
|
|
links.map(async (link) => {
|
|
const { href } = link.attribs
|
|
|
|
// The global cache can't be used for anchor links because they
|
|
// depend on each page it renders
|
|
if (!href.startsWith('#')) {
|
|
if (globalHrefCheckCache.has(href)) {
|
|
globalCacheHitCount++
|
|
return globalHrefCheckCache.get(href)
|
|
}
|
|
globalCacheMissCount++
|
|
}
|
|
|
|
const flaw = await checkHrefLink(
|
|
href,
|
|
$,
|
|
redirects,
|
|
pageMap,
|
|
checkAnchors,
|
|
checkExternalLinks,
|
|
{ verbose, patient }
|
|
)
|
|
|
|
if (flaw) {
|
|
if (level === 'critical' && !flaw.CRITICAL) {
|
|
return
|
|
}
|
|
const text = $(link).text()
|
|
if (!href.startsWith('#')) {
|
|
globalHrefCheckCache.set(href, { href, flaw, text })
|
|
}
|
|
return { href, flaw, text }
|
|
} else {
|
|
if (!href.startsWith('#')) {
|
|
globalHrefCheckCache.set(href, flaw)
|
|
}
|
|
}
|
|
})
|
|
)
|
|
for (const flaw of newFlaws) {
|
|
if (flaw) {
|
|
flaws.push(Object.assign(flaw, { page, permalink }))
|
|
}
|
|
}
|
|
|
|
if (checkImages) {
|
|
$('img[src]').each((i, img) => {
|
|
let { src } = img.attribs
|
|
|
|
// Images get a cache-busting prefix injected in the image
|
|
// E.g. <img src="/assets/cb-123456/foo/bar.png">
|
|
// We need to remove that otherwise we can't look up the image
|
|
// on disk.
|
|
src = src.replace(/\/cb-\d+\//, '/')
|
|
|
|
if (globalImageSrcCheckCache.has(src)) {
|
|
globalCacheHitCount++
|
|
return globalImageSrcCheckCache.get(src)
|
|
}
|
|
|
|
const flaw = checkImageSrc(src, $)
|
|
|
|
globalImageSrcCheckCache.set(src, flaw)
|
|
|
|
if (flaw) {
|
|
if (level === 'critical' && !flaw.CRITICAL) {
|
|
return
|
|
}
|
|
flaws.push({ permalink, page, src, flaw })
|
|
}
|
|
})
|
|
}
|
|
|
|
return flaws
|
|
}
|
|
|
|
function jsonPrintFlaws(flaws, { verboseUrl = null, out = null } = {}) {
|
|
const printableFlaws = {}
|
|
for (const { page, permalink, href, text, src, flaw } of flaws) {
|
|
const fullPath = prettyFullPath(page.fullPath)
|
|
|
|
if (!(fullPath in printableFlaws)) {
|
|
printableFlaws[fullPath] = []
|
|
}
|
|
if (href) {
|
|
printableFlaws[fullPath].push({
|
|
href,
|
|
url: verboseUrl ? new URL(permalink.href, verboseUrl).toString() : permalink.href,
|
|
text,
|
|
flaw,
|
|
})
|
|
} else if (src) {
|
|
printableFlaws[fullPath].push({
|
|
src,
|
|
})
|
|
}
|
|
}
|
|
const message = JSON.stringify(printableFlaws, undefined, 2)
|
|
if (out) {
|
|
fs.writeFileSync(out, message + '\n', 'utf-8')
|
|
} else {
|
|
console.log(message)
|
|
}
|
|
}
|
|
|
|
function printFlaws(flaws, { verboseUrl = null, out = null } = {}) {
|
|
let previousPage = null
|
|
let previousPermalink = null
|
|
|
|
function fout(msg) {
|
|
if (out) {
|
|
fs.appendFileSync(out, `${msg}\n`, 'utf-8')
|
|
} else {
|
|
console.log(msg)
|
|
}
|
|
}
|
|
|
|
for (const { page, permalink, href, text, src, flaw } of flaws) {
|
|
const fullPath = prettyFullPath(page.fullPath)
|
|
if (page !== previousPage) {
|
|
if (out) {
|
|
fout(`PAGE: ${fullPath}`)
|
|
} else {
|
|
console.log(`PAGE: ${chalk.bold(fullPath)}`)
|
|
}
|
|
}
|
|
previousPage = page
|
|
|
|
if (href) {
|
|
if (previousPermalink !== permalink.href) {
|
|
if (verboseUrl) {
|
|
fout(` URL: ${new URL(permalink.href, verboseUrl).toString()}`)
|
|
} else {
|
|
fout(` PERMALINK: ${permalink.href}`)
|
|
}
|
|
}
|
|
previousPermalink = permalink.href
|
|
|
|
if (out) {
|
|
fout(` HREF: ${href}`)
|
|
} else {
|
|
console.log(` HREF: ${chalk.bold(href)}`)
|
|
}
|
|
fout(` TEXT: ${text}`)
|
|
} else if (src) {
|
|
if (out) {
|
|
fout(` IMG SRC: ${src}`)
|
|
} else {
|
|
console.log(` IMG SRC: ${chalk.bold(src)}`)
|
|
}
|
|
} else {
|
|
throw new Error("Flaw has neither 'href' nor 'src'")
|
|
}
|
|
|
|
if (out) {
|
|
fout(` FLAW: ${flaw.CRITICAL ? flaw.CRITICAL : flaw.WARNING}`)
|
|
} else {
|
|
console.log(
|
|
` FLAW: ${flaw.CRITICAL ? chalk.red(flaw.CRITICAL) : chalk.yellow(flaw.WARNING)}`
|
|
)
|
|
}
|
|
fout('')
|
|
}
|
|
}
|
|
|
|
// Given a full path, change to so it's relative to the `cwd()` so that you
|
|
// can take it from the output and paste it to something like `code ...here...`
|
|
// The problem with displaying the full path is that it's quite noisy and
|
|
// takes up a lot of space. Sure, you can copy and paste it in front of
|
|
// `vi` or `ls` or `code` but if we display it relative to `cwd()` you
|
|
// can still paste it to the next command but it's not taking up so much
|
|
// space.
|
|
function prettyFullPath(fullPath) {
|
|
return path.relative(process.cwd(), fullPath)
|
|
}
|
|
|
|
const globalHrefCheckCache = new Map()
|
|
const globalImageSrcCheckCache = new Map()
|
|
let globalCacheHitCount = 0
|
|
let globalCacheMissCount = 0
|
|
|
|
async function checkHrefLink(
|
|
href,
|
|
$,
|
|
redirects,
|
|
pageMap,
|
|
checkAnchors = false,
|
|
checkExternalLinks = false,
|
|
{ verbose = false, patient = false } = {}
|
|
) {
|
|
if (href === '#') {
|
|
if (checkAnchors) {
|
|
return { WARNING: 'Link is just an empty `#`' }
|
|
}
|
|
} else if (href.startsWith('#')) {
|
|
if (checkAnchors) {
|
|
const countDOMItems = $(href).length
|
|
if (countDOMItems !== 1) {
|
|
return { WARNING: `Anchor is an empty string` }
|
|
}
|
|
}
|
|
} else if (href.startsWith('/')) {
|
|
const pathname = new URL(href, 'http://example.com').pathname
|
|
|
|
// Remember, if the Markdown has something like
|
|
//
|
|
// See [my link][/some/page/]
|
|
//
|
|
// In the post-processing, that will actually become
|
|
//
|
|
// See <a href="/en/some/page">my link</a>
|
|
//
|
|
// But, if that link was a redirect, that would have been left
|
|
// untouched.
|
|
if (pathname.endsWith('/')) {
|
|
return { WARNING: 'Links with a trailing / will always redirect' }
|
|
} else {
|
|
if (pathname.split('/')[1] in STATIC_PREFIXES) {
|
|
const staticFilePath = path.join(
|
|
STATIC_PREFIXES[pathname.split('/')[1]],
|
|
pathname.split(path.sep).slice(2).join(path.sep)
|
|
)
|
|
if (!fs.existsSync(staticFilePath)) {
|
|
return { CRITICAL: `Static file not found ${staticFilePath} (${pathname})` }
|
|
}
|
|
} else if (getRedirect(pathname, { redirects, pages: pageMap })) {
|
|
return { WARNING: `Redirect to ${getRedirect(pathname, { redirects, pages: pageMap })}` }
|
|
} else if (!pageMap[pathname]) {
|
|
if (deprecatedVersionPrefixesRegex.test(pathname)) {
|
|
return
|
|
}
|
|
|
|
return { CRITICAL: 'Broken link' }
|
|
}
|
|
}
|
|
} else if (checkExternalLinks) {
|
|
if (!href.startsWith('https://')) {
|
|
return { WARNING: `Will not check external URLs that are not HTTPS (${href})` }
|
|
}
|
|
if (linksToSkip(href)) {
|
|
return
|
|
}
|
|
const { ok, ...info } = await checkExternalURL(href, { verbose, patient })
|
|
if (!ok) {
|
|
return { CRITICAL: `Broken external link (${JSON.stringify(info)})` }
|
|
}
|
|
}
|
|
}
|
|
|
|
const _fetchCache = new Map()
|
|
async function checkExternalURL(url, { verbose = false, patient = false } = {}) {
|
|
if (!url.startsWith('https://')) throw new Error('Invalid URL')
|
|
const cleanURL = url.split('#')[0]
|
|
if (!_fetchCache.has(cleanURL)) {
|
|
_fetchCache.set(cleanURL, innerFetch(cleanURL, { verbose, patient }))
|
|
}
|
|
return _fetchCache.get(cleanURL)
|
|
}
|
|
|
|
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms))
|
|
|
|
// Global for recording which domains we get rate-limited on.
|
|
// For example, if you got rate limited on `something.github.com/foo`
|
|
// and now we're asked to fetch for `something.github.com/bar`
|
|
// it's good to know to now bother yet.
|
|
const _rateLimitedDomains = new Map()
|
|
|
|
async function innerFetch(url, config = {}) {
|
|
const { verbose, useGET, patient } = config
|
|
|
|
const { hostname } = new URL(url)
|
|
if (_rateLimitedDomains.has(hostname)) {
|
|
await sleep(_rateLimitedDomains.get(hostname))
|
|
}
|
|
// The way `got` does retries:
|
|
//
|
|
// sleep = 1000 * Math.pow(2, retry - 1) + Math.random() * 100
|
|
//
|
|
// So, it means:
|
|
//
|
|
// 1. ~1000ms
|
|
// 2. ~2000ms
|
|
// 3. ~4000ms
|
|
//
|
|
// ...if the limit we set is 3.
|
|
// Our own timeout, in ./middleware/timeout.js defaults to 10 seconds.
|
|
// So there's no point in trying more attempts than 3 because it would
|
|
// just timeout on the 10s. (i.e. 1000 + 2000 + 4000 + 8000 > 10,000)
|
|
const retry = {
|
|
limit: patient ? 5 : 2,
|
|
}
|
|
const timeout = { request: patient ? 10000 : 2000 }
|
|
|
|
const headers = {
|
|
'User-Agent':
|
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36',
|
|
}
|
|
|
|
const retries = config.retries || 0
|
|
const httpFunction = useGET ? got.get : got.head
|
|
|
|
if (verbose) console.log(`External URL ${useGET ? 'GET' : 'HEAD'}: ${url} (retries: ${retries})`)
|
|
try {
|
|
const r = await httpFunction(url, {
|
|
headers,
|
|
throwHttpErrors: false,
|
|
retry,
|
|
timeout,
|
|
})
|
|
if (verbose) {
|
|
console.log(
|
|
`External URL ${useGET ? 'GET' : 'HEAD'} ${url}: ${r.statusCode} (retries: ${retries})`
|
|
)
|
|
}
|
|
|
|
// If we get rate limited, remember that this hostname is now all
|
|
// rate limited. And sleep for the number of seconds that the
|
|
// `retry-after` header indicated.
|
|
if (r.statusCode === 429) {
|
|
let sleepTime = Math.min(
|
|
60_000,
|
|
Math.max(10_000, getRetryAfterSleep(r.headers['retry-after']))
|
|
)
|
|
// Sprinkle a little jitter so it doesn't all start again all
|
|
// at the same time
|
|
sleepTime += Math.random() * 10 * 1000
|
|
// Give it a bit extra when we can be really patient
|
|
if (patient) sleepTime += 30 * 1000
|
|
|
|
_rateLimitedDomains.set(hostname, sleepTime + Math.random() * 10 * 1000)
|
|
if (verbose)
|
|
console.log(
|
|
chalk.yellow(
|
|
`Rate limited on ${hostname} (${url}). Sleeping for ${(sleepTime / 1000).toFixed(1)}s`
|
|
)
|
|
)
|
|
await sleep(sleepTime)
|
|
return innerFetch(url, Object.assign({}, config, { retries: retries + 1 }))
|
|
} else {
|
|
_rateLimitedDomains.delete(hostname)
|
|
}
|
|
|
|
// Perhaps the server doesn't suppport HEAD requests.
|
|
// If so, try again with a regular GET.
|
|
if ((r.statusCode === 405 || r.statusCode === 404) && !useGET) {
|
|
return innerFetch(url, Object.assign({}, config, { useGET: true }))
|
|
}
|
|
if (verbose) {
|
|
console.log((r.ok ? chalk.green : chalk.red)(`${r.statusCode} on ${url}`))
|
|
}
|
|
return { ok: r.ok, statusCode: r.statusCode }
|
|
} catch (err) {
|
|
if (err instanceof RequestError) {
|
|
if (verbose) {
|
|
console.log(chalk.yellow(`RequestError (${err.message}) on ${url}`))
|
|
}
|
|
return { ok: false, requestError: err.message }
|
|
}
|
|
throw err
|
|
}
|
|
}
|
|
|
|
// Return number of milliseconds from a `Retry-After` header value
|
|
function getRetryAfterSleep(headerValue) {
|
|
if (!headerValue) return 0
|
|
let ms = Math.round(parseFloat(headerValue) * 1000)
|
|
if (isNaN(ms)) {
|
|
ms = Math.max(0, new Date(headerValue) - new Date())
|
|
}
|
|
return ms
|
|
}
|
|
|
|
function checkImageSrc(src) {
|
|
const pathname = new URL(src, 'http://example.com').pathname
|
|
if (!pathname.startsWith('/')) {
|
|
return { WARNING: "External images can't not be checked" }
|
|
}
|
|
const prefix = pathname.split('/')[1]
|
|
if (prefix in STATIC_PREFIXES) {
|
|
const staticFilePath = path.join(
|
|
STATIC_PREFIXES[prefix],
|
|
pathname.split(path.sep).slice(2).join(path.sep)
|
|
)
|
|
if (!fs.existsSync(staticFilePath)) {
|
|
return { CRITICAL: `Static file not found (${pathname})` }
|
|
}
|
|
} else {
|
|
return { WARNING: `Unrecognized image src prefix (${prefix})` }
|
|
}
|
|
}
|
|
|
|
function summarizeFlaws(flaws) {
|
|
if (flaws.length) {
|
|
console.log(
|
|
chalk.bold(
|
|
`Found ${flaws.length.toLocaleString()} flaw${flaws.length === 1 ? '' : 's'} in total.`
|
|
)
|
|
)
|
|
} else {
|
|
console.log(chalk.green('No flaws found! 💖'))
|
|
}
|
|
}
|
|
|
|
function summarizeCounts(pages) {
|
|
const count = pages.map((page) => page.permalinks.length).reduce((a, b) => a + b, 0)
|
|
console.log(
|
|
`Tested ${count.toLocaleString()} permalinks across ${pages.length.toLocaleString()} pages`
|
|
)
|
|
}
|
|
|
|
function shuffle(array) {
|
|
let currentIndex = array.length
|
|
let randomIndex
|
|
|
|
// While there remain elements to shuffle...
|
|
while (currentIndex !== 0) {
|
|
// Pick a remaining element...
|
|
randomIndex = Math.floor(Math.random() * currentIndex)
|
|
currentIndex--
|
|
|
|
// And swap it with the current element.
|
|
;[array[currentIndex], array[randomIndex]] = [array[randomIndex], array[currentIndex]]
|
|
}
|
|
|
|
return array
|
|
}
|
|
|
|
async function renderInnerHTML(page, permalink) {
|
|
const next = () => {}
|
|
const res = {}
|
|
|
|
const pagePath = permalink.href
|
|
const req = {
|
|
path: pagePath,
|
|
language: permalink.languageCode,
|
|
pagePath,
|
|
cookies: {},
|
|
}
|
|
await contextualize(req, res, next)
|
|
await shortVersions(req, res, next)
|
|
const context = Object.assign({}, req.context, { page })
|
|
context.relativePath = page.relativePath
|
|
return await renderContent(page.markdown, context)
|
|
}
|
|
|
|
// Delibertely commented out. Kept temporarily in case it's better.
|
|
// async function renderPage(page, permalink) {
|
|
// const next = () => {}
|
|
// const res = {}
|
|
// const pagePath = permalink.href
|
|
// const req = {
|
|
// path: pagePath,
|
|
// language: permalink.languageCode,
|
|
// pagePath,
|
|
// cookies: {},
|
|
// }
|
|
// await contextualize(req, res, next)
|
|
// const context = Object.assign({}, req.context, { page })
|
|
// return await page._render(context)
|
|
// }
|