From 522099e0ba5a220f0a4778fbbf0d79d59fb8cab9 Mon Sep 17 00:00:00 2001 From: Peter Bengtsson Date: Wed, 19 Apr 2023 15:24:58 -0400 Subject: [PATCH 1/2] Separate failure from broken link check itself (#36488) --- .../check-broken-links-github-github.yml | 32 ++++++----- script/check-github-github-links.js | 57 ++++++++++++++----- script/helpers/git-utils.js | 35 +++++++++++- 3 files changed, 94 insertions(+), 30 deletions(-) diff --git a/.github/workflows/check-broken-links-github-github.yml b/.github/workflows/check-broken-links-github-github.yml index 0e839543d4..4abd458f95 100644 --- a/.github/workflows/check-broken-links-github-github.yml +++ b/.github/workflows/check-broken-links-github-github.yml @@ -57,6 +57,7 @@ jobs: env: NODE_ENV: production PORT: 4000 + ENABLED_LANGUAGES: en run: | node server.js & @@ -65,22 +66,18 @@ jobs: - name: Run broken github/github link check run: | - script/check-github-github-links.js > broken_github_github_links.md + script/check-github-github-links.js broken_github_github_links.md - # check-github-github-links.js returns 0 if no links are broken, and 1 if any links - # are broken. When an Actions step's exit code is 1, the action run's job status - # is failure and the run ends. The following steps create an issue for the - # broken link report only if any links are broken, so `if: ${{ failure() }}` - # ensures the steps run despite the previous step's failure of the job. - # - # https://docs.github.com/actions/reference/context-and-expression-syntax-for-github-actions#job-status-check-functions - - - if: ${{ failure() && env.FREEZE != 'true' }} - name: Get title for issue + - name: Get title for issue + # If the file 'broken_github_github_links.md' got created, + # the hash of it will not be an empty string. That means if found + # broken links, we want to create an issue. + if: ${{ hashFiles('broken_github_github_links.md') != '' && env.FREEZE != 'true' }} id: check run: echo "title=$(head -1 broken_github_github_links.md)" >> $GITHUB_OUTPUT - - if: ${{ failure() && env.FREEZE != 'true'}} - name: Create issue from file + + - name: Create issue from file + if: ${{ hashFiles('broken_github_github_links.md') != '' && env.FREEZE != 'true' }} id: github-github-broken-link-report uses: peter-evans/create-issue-from-file@433e51abf769039ee20ba1293a088ca19d573b7f with: @@ -89,3 +86,12 @@ jobs: content-filepath: ./broken_github_github_links.md repository: ${{ env.REPORT_REPOSITORY }} labels: ${{ env.REPORT_LABEL }} + + - name: Send Slack notification if workflow fails + uses: someimportantcompany/github-actions-slack-message@1d367080235edfa53df415bd8e0bbab480f29bad + if: ${{ failure() && env.FREEZE != 'true' }} + with: + channel: ${{ secrets.DOCS_ALERTS_SLACK_CHANNEL_ID }} + bot-token: ${{ secrets.SLACK_DOCS_BOT_TOKEN }} + color: failure + text: The last "Check Broken Docs Links in github/github" run for ${{github.repository}} failed. See https://github.com/${{github.repository}}/actions/workflows/check-broken-links-github-github.yml diff --git a/script/check-github-github-links.js b/script/check-github-github-links.js index e49f729ab1..1c24b484bf 100755 --- a/script/check-github-github-links.js +++ b/script/check-github-github-links.js @@ -4,11 +4,18 @@ // // Run this script to get all broken docs.github.com links in github/github // +// To run this locally, you'll generate a PAT and create an environment +// variable called GITHUB_TOKEN. +// Easiest is to create a *classic* Personal Access Token and make sure +// it has all "repo" scopes. You also have to press the "Configure SSO" +// for it. +// // [end-readme] import fs from 'fs/promises' import got, { RequestError } from 'got' +import { program } from 'commander' import { getContents, getPathsWithMatchingStrings } from './helpers/git-utils.js' @@ -19,8 +26,15 @@ if (!process.env.GITHUB_TOKEN) { const FORCE_DOWNLOAD = Boolean(JSON.parse(process.env.FORCE_DOWNLOAD || 'false')) const BATCH_SIZE = JSON.parse(process.env.BATCH_SIZE || '10') const BASE_URL = process.env.BASE_URL || 'http://localhost:4000' +const CACHE_SEARCHES = !JSON.parse(process.env.CI || 'false') -main() +program + .description('Check for broken links in github/github') + .option('--check', 'Exit non-zero if there were >0 broken links') + .argument('[output-file]', 'If omitted or "-", will write to stdout') + .parse(process.argv) + +main(program.opts(), program.args) // The way `got` does retries: // @@ -46,7 +60,12 @@ const timeoutConfiguration = { request: 3000, } -async function main() { +async function main(opts, args) { + const { check } = opts + let outputFile = null + if (args && args.length > 0 && args[0] !== '-') { + outputFile = args[0] + } const searchStrings = ['https://docs.github.com', 'GitHub help_url', 'GitHub developer_help_url'] const foundFiles = [] @@ -58,7 +77,12 @@ async function main() { } } if (!foundFiles.length || FORCE_DOWNLOAD) { - foundFiles.push(...(await getPathsWithMatchingStrings(searchStrings, 'github', 'github'))) + foundFiles.push( + ...(await getPathsWithMatchingStrings(searchStrings, 'github', 'github', { + cache: CACHE_SEARCHES, + forceDownload: FORCE_DOWNLOAD, + })) + ) await fs.writeFile('/tmp/foundFiles.json', JSON.stringify(foundFiles, undefined, 2), 'utf-8') } const searchFiles = [...new Set(foundFiles)] // filters out dupes @@ -164,7 +188,7 @@ async function main() { // fail in quite a nice way and not "blame got". const url = new URL(BASE_URL + linkPath) try { - await got(url.href, { + await got.head(url.href, { retry: retryConfiguration, timeout: timeoutConfiguration, }) @@ -182,17 +206,22 @@ async function main() { if (!brokenLinks.length) { console.log('All links are good!') - process.exit(0) + } else { + let markdown = `Found ${brokenLinks.length} total broken links in github/github` + markdown += '\n\n```\n' + markdown += JSON.stringify([...brokenLinks], null, 2) + markdown += '\n```\n' + if (outputFile) { + await fs.writeFile(outputFile, markdown, 'utf-8') + console.log(`Wrote Markdown about broken files to ${outputFile}`) + } else { + console.log(markdown) + } + + if (check) { + process.exit(brokenLinks.length) + } } - - console.log(`Found ${brokenLinks.length} total broken links in github/github`) - console.log('```') - - console.log(`${JSON.stringify([...brokenLinks], null, 2)}`) - - console.log('```') - // Exit unsuccessfully if broken links are found. - process.exit(1) } function endsWithAny(suffixes, string) { diff --git a/script/helpers/git-utils.js b/script/helpers/git-utils.js index ffc9c39bbf..af33182605 100644 --- a/script/helpers/git-utils.js +++ b/script/helpers/git-utils.js @@ -1,4 +1,7 @@ #!/usr/bin/env node +import crypto from 'crypto' +import fs from 'fs/promises' + import Github from './github.js' const github = Github() @@ -134,7 +137,12 @@ export async function createIssueComment(owner, repo, pullNumber, body) { } // Search for a string in a file in code and return the array of paths to files that contain string -export async function getPathsWithMatchingStrings(strArr, org, repo) { +export async function getPathsWithMatchingStrings( + strArr, + org, + repo, + { cache = true, forceDownload = false } = {} +) { const perPage = 100 const paths = new Set() @@ -146,7 +154,7 @@ export async function getPathsWithMatchingStrings(strArr, org, repo) { let currentCount = 0 do { - const data = await searchCode(q, perPage, currentPage) + const data = await searchCode(q, perPage, currentPage, cache, forceDownload) data.items.map((el) => paths.add(el.path)) totalCount = data.total_count currentCount += data.items.length @@ -161,13 +169,34 @@ export async function getPathsWithMatchingStrings(strArr, org, repo) { return paths } -async function searchCode(q, perPage, currentPage) { +async function searchCode(q, perPage, currentPage, cache = true, forceDownload = false) { + const cacheKey = `searchCode-${q}-${perPage}-${currentPage}` + const tempFilename = `/tmp/searchCode-${crypto + .createHash('md5') + .update(cacheKey) + .digest('hex')}.json` + + if (!forceDownload && cache) { + try { + return JSON.parse(await fs.readFile(tempFilename, 'utf8')) + } catch (error) { + if (error.code !== 'ENOENT') { + throw error + } + console.log(`Cache miss on ${tempFilename} (${cacheKey})`) + } + } + try { const { data } = await secondaryRateLimitRetry(github.rest.search.code, { q, per_page: perPage, page: currentPage, }) + if (cache) { + await fs.writeFile(tempFilename, JSON.stringify(data)) + console.log(`Wrote search results to ${tempFilename}`) + } return data } catch (err) { From fe363e94f944f425a70913294fd2b815c42aa784 Mon Sep 17 00:00:00 2001 From: Peter Bengtsson Date: Wed, 19 Apr 2023 15:27:56 -0400 Subject: [PATCH 2/2] force fix for bad Markdown tables in translations (#36495) --- lib/page-data.js | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/lib/page-data.js b/lib/page-data.js index f07634b007..7afdf9694a 100644 --- a/lib/page-data.js +++ b/lib/page-data.js @@ -158,6 +158,19 @@ async function translateTree(dir, langObj, enTree) { content = content.replaceAll('[AUTOTITLE"을]', '[AUTOTITLE]') content = content.replaceAll('["AUTOTITLE]', '"[AUTOTITLE]') + // The page content/code-security/secret-scanning/secret-scanning-patterns.md + // uses some intricate tables in Markdown where exact linebreaks can + // cause the page to render incorrectly. Instead of becoming a ``, + // it becomes a massive `

` tag. + // Ideally, we should have a better solution that doesn't require such + // "sensitive" Markdown but for now, this change is important so the + // Markdown-to-HTML rendering doesn't become totally broken. + // See internal issue #2984 + content = content.replaceAll( + '{%- for entry in secretScanningData %} |', + '{%- for entry in secretScanningData %}\n|' + ) + // The "content" isn't a frontmatter key translatedData.markdown = content