1
0
mirror of synced 2025-12-23 11:54:18 -05:00

LinkChecker and better linksToSkip function (#23001)

* LinkChecker and better linksToSkip function

Part of #1253

* try now

* escaped

* make sure it skips all non-english links

* feedbacked
This commit is contained in:
Peter Bengtsson
2021-11-30 16:39:39 -05:00
committed by GitHub
parent e59c79acd0
commit 2f6f076cc0
3 changed files with 33 additions and 10 deletions

View File

@@ -1,18 +1,28 @@
// Linkinator treats the following as regex. /**
* This file exports a mix of strings and of regexes. Linkinator relies
* on this in `script/check-english-links.js` when we encounter external
* links that we *specifically ignore*. That means, that URLs or patterns
* mentioned in this file might appear within our content but we don't
* bother checking that they actually work.
*/
/* eslint-disable prefer-regex-literals */
export default [ export default [
// Skip GitHub search links. // Skip GitHub search links.
'https://github.com/search\\?', // E.g. https://github.com/search?foo=bar
'https://github.com/github/gitignore/search\\?', new RegExp('https://github\\.com/search\\?'),
new RegExp('https://github\\.com/github/gitignore/search\\?'),
// These links require auth. // These links require auth.
'https://github.com/settings/profile', new RegExp('https://github\\.com/settings/profile'),
'https://github.com/github/docs/edit', new RegExp('https://github\\.com/github/docs/edit'),
'https://github.com/github/insights-releases/releases/latest', new RegExp('https://github\\.com/github/insights-releases/releases/latest'),
'https://classroom.github.com/videos', new RegExp('https://classroom\\.github.com/videos'),
// Oneoff links that link checkers think are broken but are not. // Oneoff links that link checkers think are broken but are not.
'https://haveibeenpwned.com/', 'https://haveibeenpwned.com/',
'https://www.ilo.org/dyn/normlex/en/f\\?p=NORMLEXPUB:12100:0::NO::P12100_ILO_CODE:P029', 'https://www.ilo.org/dyn/normlex/en/f?p=NORMLEXPUB:12100:0::NO::P12100_ILO_CODE:P029',
'https://www.linkedin.com/company/github', 'https://www.linkedin.com/company/github',
'https://www.facebook.com/', 'https://www.facebook.com/',
'https://ko-fi.com/', 'https://ko-fi.com/',

View File

@@ -52,7 +52,7 @@ program
// Skip non-English content. // Skip non-English content.
const languagesToSkip = Object.keys(libLanguages) const languagesToSkip = Object.keys(libLanguages)
.filter((code) => code !== 'en') .filter((code) => code !== 'en')
.map((code) => `${root}/${code}`) .map((code) => new RegExp(`${root}/${code}`))
// Skip deprecated Enterprise content. // Skip deprecated Enterprise content.
// Capture the old format https://docs.github.com/enterprise/2.1/ // Capture the old format https://docs.github.com/enterprise/2.1/
@@ -66,7 +66,19 @@ const config = {
recurse: !program.opts().dryRun, recurse: !program.opts().dryRun,
silent: true, silent: true,
// The values in this array are treated as regexes. // The values in this array are treated as regexes.
linksToSkip: [enterpriseReleasesToSkip, ...languagesToSkip, ...excludedLinks], linksToSkip: linksToSkipFactory([enterpriseReleasesToSkip, ...languagesToSkip, ...excludedLinks]),
}
// Return a function that can as quickly as possible check if a certain
// href input should be skipped.
// Do this so we can use a `Set` and a `iterable.some()` for a speedier
// check. The default implementation in Linkinator, if you set
// the `linksToSkip` config to be an array, it will, for every URL it
// checks turn that into a new regex every single time.
function linksToSkipFactory(regexAndURLs) {
const set = new Set(regexAndURLs.filter((regexOrURL) => typeof regexOrURL === 'string'))
const regexes = regexAndURLs.filter((regexOrURL) => regexOrURL instanceof RegExp)
return (href) => set.has(href) || regexes.some((regex) => regex.test(href))
} }
main() main()

View File

@@ -79,6 +79,7 @@ const IGNORE_PATHS = [
'.vscode', // Not part of the repo but could be for a developer locally '.vscode', // Not part of the repo but could be for a developer locally
'node_modules', 'node_modules',
'translations', 'translations',
'.linkinator',
'**/*.png', // Do not check images or font files. '**/*.png', // Do not check images or font files.
'**/*.jpg', // We could just put all of assets/* here, but that would prevent any '**/*.jpg', // We could just put all of assets/* here, but that would prevent any
'**/*.gif', // READMEs or other text-based files from being checked. '**/*.gif', // READMEs or other text-based files from being checked.