From c95a5c9bdf3ae54ad77b074739b262e7d0e8a077 Mon Sep 17 00:00:00 2001 From: Peter Bengtsson Date: Thu, 17 Nov 2022 14:19:32 +0100 Subject: [PATCH] Treat "failing" external link checks as warning (#32456) --- .../actions/rendered-content-link-checker.js | 51 +++++++++++++++++-- .github/workflows/link-check-daily.yml | 3 ++ lib/excluded-links.js | 1 - script/rendered-content-link-checker.js | 1 + 4 files changed, 51 insertions(+), 5 deletions(-) diff --git a/.github/actions/rendered-content-link-checker.js b/.github/actions/rendered-content-link-checker.js index d897ab0ba8..49e1f9f21a 100755 --- a/.github/actions/rendered-content-link-checker.js +++ b/.github/actions/rendered-content-link-checker.js @@ -77,8 +77,15 @@ const deprecatedVersionPrefixesRegex = new RegExp( // When this file is invoked directly from action as opposed to being imported if (import.meta.url.endsWith(process.argv[1])) { // Optional env vars - const { ACTION_RUN_URL, LEVEL, FILES_CHANGED, REPORT_REPOSITORY, REPORT_AUTHOR, REPORT_LABEL } = - process.env + const { + ACTION_RUN_URL, + LEVEL, + FILES_CHANGED, + REPORT_REPOSITORY, + REPORT_AUTHOR, + REPORT_LABEL, + EXTERNAL_SERVER_ERRORS_AS_WARNINGS, + } = process.env const octokit = github() @@ -113,6 +120,7 @@ if (import.meta.url.endsWith(process.argv[1])) { reportLabel: REPORT_LABEL, reportAuthor: REPORT_AUTHOR, actionContext: getActionContext(), + externalServerErrorsAsWarning: EXTERNAL_SERVER_ERRORS_AS_WARNINGS, } if (opts.shouldComment || opts.createReport) { @@ -155,6 +163,7 @@ if (import.meta.url.endsWith(process.argv[1])) { * random {boolean} - Randomize page order for debugging when true * patient {boolean} - Wait longer and retry more times for rate-limited external URLS * bail {boolean} - Throw an error on the first page (not permalink) that has >0 flaws + * externalServerErrorsAsWarning {boolean} - Treat >=500 errors or temporary request errors as warning * */ async function main(core, octokit, uploadArtifact, opts = {}) { @@ -583,6 +592,7 @@ async function processPermalink(core, permalink, page, pageMap, redirects, opts, checkExternalLinks, verbose, patient, + externalServerErrorsAsWarning, } = opts const html = await renderInnerHTML(page, permalink) const $ = cheerio.load(html) @@ -613,6 +623,7 @@ async function processPermalink(core, permalink, page, pageMap, redirects, opts, pageMap, checkAnchors, checkExternalLinks, + externalServerErrorsAsWarning, { verbose, patient }, db ) @@ -758,6 +769,7 @@ async function checkHrefLink( pageMap, checkAnchors = false, checkExternalLinks = false, + externalServerErrorsAsWarning = false, { verbose = false, patient = false } = {}, db = null ) { @@ -815,11 +827,39 @@ async function checkHrefLink( } const { ok, ...info } = await checkExternalURLCached(core, href, { verbose, patient }, db) if (!ok) { - return { CRITICAL: `Broken external link (${JSON.stringify(info)})`, isExternal: true } + // By default, an not-OK problem with an external link is CRITICAL + // but if it was a `responseError` or the statusCode was >= 500 + // then downgrade it to WARNING. + let problem = 'CRITICAL' + if (externalServerErrorsAsWarning) { + if ( + (info.statusCode && info.statusCode >= 500) || + (info.requestError && isTemporaryRequestError(info.requestError)) + ) { + problem = 'WARNING' + } + } + return { [problem]: `Broken external link (${JSON.stringify(info)})`, isExternal: true } } } } +// Return true if the request error is sufficiently temporary. For example, +// a request to `https://exammmmple.org` will fail with `ENOTFOUND` because +// the DNS entry doesn't exist. It means it won't have much hope if you +// simply try again later. +// However, an `ETIMEDOUT` means it could work but it didn't this time but +// might if we try again a different hour or day. +function isTemporaryRequestError(requestError) { + if (typeof requestError === 'string') { + // See https://betterstack.com/community/guides/scaling-nodejs/nodejs-errors/ + // for a definition of each one. + const errorEnums = ['ECONNRESET', 'ECONNREFUSED', 'ETIMEDOUT', 'ECONNABORTED'] + return errorEnums.some((enum_) => requestError.includes(enum_)) + } + return false +} + // Can't do this memoization within the checkExternalURL because it can // return a Promise since it already collates multiple URLs under the // same cache key. @@ -844,7 +884,10 @@ async function checkExternalURLCached(core, href, { verbose, patient }, db) { } } - const result = await checkExternalURL(core, href, { verbose, patient }) + const result = await checkExternalURL(core, href, { + verbose, + patient, + }) if (cacheMaxAge) { // By only cache storing successful results, we give the system a chance diff --git a/.github/workflows/link-check-daily.yml b/.github/workflows/link-check-daily.yml index be9aabf207..7a55af00d5 100644 --- a/.github/workflows/link-check-daily.yml +++ b/.github/workflows/link-check-daily.yml @@ -77,6 +77,9 @@ jobs: # But mind you that the number has a 10% chance of "jitter" # to avoid a stampeding herd when they all expire some day. EXTERNAL_LINK_CHECKER_MAX_AGE_DAYS: 7 + # If we're unable to connect or the server returns a 50x error, + # treat it as a warning and not as a broken link. + EXTERNAL_SERVER_ERRORS_AS_WARNINGS: true timeout-minutes: 30 run: node .github/actions/rendered-content-link-checker.js diff --git a/lib/excluded-links.js b/lib/excluded-links.js index 65d3281956..0f80af68d3 100644 --- a/lib/excluded-links.js +++ b/lib/excluded-links.js @@ -47,5 +47,4 @@ export default [ 'https://developer.android.com/studio', 'https://lastpass.com/', 'https://lastpass.com/auth/', - 'https://jqplay.org', ] diff --git a/script/rendered-content-link-checker.js b/script/rendered-content-link-checker.js index afa54806c1..7f76384cb8 100755 --- a/script/rendered-content-link-checker.js +++ b/script/rendered-content-link-checker.js @@ -69,6 +69,7 @@ program .option('--bail', 'Exit on the first possible flaw') .option('--verbose-url ', 'Print the absolute URL if set') .option('--fail-on-flaw', 'Throw error on link flaws (default: false)') + .option('--external-server-errors-as-warning', 'Treat server errors as warning (default: false)') .option('--max ', 'integer argument (default: none)', (value) => { const parsed = parseInt(value, 10) if (isNaN(parsed)) {