diff --git a/lib/excluded-links.js b/lib/excluded-links.js index d676989446..f8a31e6fc5 100644 --- a/lib/excluded-links.js +++ b/lib/excluded-links.js @@ -1,18 +1,28 @@ -// Linkinator treats the following as regex. +/** + * This file exports a mix of strings and of regexes. Linkinator relies + * on this in `script/check-english-links.js` when we encounter external + * links that we *specifically ignore*. That means, that URLs or patterns + * mentioned in this file might appear within our content but we don't + * bother checking that they actually work. + */ + +/* eslint-disable prefer-regex-literals */ + export default [ // Skip GitHub search links. - 'https://github.com/search\\?', - 'https://github.com/github/gitignore/search\\?', + // E.g. https://github.com/search?foo=bar + new RegExp('https://github\\.com/search\\?'), + new RegExp('https://github\\.com/github/gitignore/search\\?'), // These links require auth. - 'https://github.com/settings/profile', - 'https://github.com/github/docs/edit', - 'https://github.com/github/insights-releases/releases/latest', - 'https://classroom.github.com/videos', + new RegExp('https://github\\.com/settings/profile'), + new RegExp('https://github\\.com/github/docs/edit'), + new RegExp('https://github\\.com/github/insights-releases/releases/latest'), + new RegExp('https://classroom\\.github.com/videos'), // Oneoff links that link checkers think are broken but are not. 'https://haveibeenpwned.com/', - 'https://www.ilo.org/dyn/normlex/en/f\\?p=NORMLEXPUB:12100:0::NO::P12100_ILO_CODE:P029', + 'https://www.ilo.org/dyn/normlex/en/f?p=NORMLEXPUB:12100:0::NO::P12100_ILO_CODE:P029', 'https://www.linkedin.com/company/github', 'https://www.facebook.com/', 'https://ko-fi.com/', diff --git a/script/check-english-links.js b/script/check-english-links.js index 2ddb8c3cc8..96b6ad9c40 100755 --- a/script/check-english-links.js +++ b/script/check-english-links.js @@ -52,7 +52,7 @@ program // Skip non-English content. const languagesToSkip = Object.keys(libLanguages) .filter((code) => code !== 'en') - .map((code) => `${root}/${code}`) + .map((code) => new RegExp(`${root}/${code}`)) // Skip deprecated Enterprise content. // Capture the old format https://docs.github.com/enterprise/2.1/ @@ -66,7 +66,19 @@ const config = { recurse: !program.opts().dryRun, silent: true, // The values in this array are treated as regexes. - linksToSkip: [enterpriseReleasesToSkip, ...languagesToSkip, ...excludedLinks], + linksToSkip: linksToSkipFactory([enterpriseReleasesToSkip, ...languagesToSkip, ...excludedLinks]), +} + +// Return a function that can as quickly as possible check if a certain +// href input should be skipped. +// Do this so we can use a `Set` and a `iterable.some()` for a speedier +// check. The default implementation in Linkinator, if you set +// the `linksToSkip` config to be an array, it will, for every URL it +// checks turn that into a new regex every single time. +function linksToSkipFactory(regexAndURLs) { + const set = new Set(regexAndURLs.filter((regexOrURL) => typeof regexOrURL === 'string')) + const regexes = regexAndURLs.filter((regexOrURL) => regexOrURL instanceof RegExp) + return (href) => set.has(href) || regexes.some((regex) => regex.test(href)) } main() diff --git a/tests/meta/repository-references.js b/tests/meta/repository-references.js index b4bd706d42..82839218a7 100644 --- a/tests/meta/repository-references.js +++ b/tests/meta/repository-references.js @@ -79,6 +79,7 @@ const IGNORE_PATHS = [ '.vscode', // Not part of the repo but could be for a developer locally 'node_modules', 'translations', + '.linkinator', '**/*.png', // Do not check images or font files. '**/*.jpg', // We could just put all of assets/* here, but that would prevent any '**/*.gif', // READMEs or other text-based files from being checked.