1
0
mirror of synced 2026-01-30 15:01:41 -05:00

Merge pull request #10457 from github/repo-sync

repo sync
This commit is contained in:
Octomerger Bot
2021-10-01 13:59:08 -04:00
committed by GitHub
5 changed files with 261 additions and 2 deletions

View File

@@ -0,0 +1,75 @@
name: Check Broken Docs Links in github/github
# **What it does**: This checks for any broken docs.github.com links in github/github
# **Why we have it**: Make sure all docs in github/github are up to date
# **Who does it impact**: Docs engineering, people on GitHub
on:
workflow_dispatch:
schedule:
- cron: '20 13 * * 1' # run every Monday at 1:20PM UTC
# **IMPORTANT:** Do not change the FREEZE environment variable set here!
# This workflow runs on a recurring basis. To temporarily disable it (e.g.,
# during a docs deployment freeze), add an Actions Secret to the repo settings
# called `FREEZE` with a value of `true`. To re-enable workflow, simply
# delete that Secret from the repo settings. The environment variable here
# will duplicate that Secret's value for later evaluation.
env:
FREEZE: ${{ secrets.FREEZE }}
jobs:
check_github_github_links:
if: github.repository == 'github/docs-internal'
runs-on: ubuntu-latest
env:
# need to use a token from a user with access to github/github for this step
GITHUB_TOKEN: ${{ secrets.DOCS_BOT_FR }}
FIRST_RESPONDER_PROJECT: Docs content first responder
REPORT_AUTHOR: docubot
REPORT_LABEL: github github broken link report
REPORT_REPOSITORY: github/docs-content
steps:
- if: ${{ env.FREEZE == 'true' }}
run: |
echo 'The repo is currently frozen! Exiting this workflow.'
exit 1 # prevents further steps from running
- name: Checkout
uses: actions/checkout@5a4ac9002d0be2fb38bd78e4b4dbde5606d7042f
- name: Setup Node
uses: actions/setup-node@38d90ce44d5275ad62cc48384b3d8a58c500bb5f
with:
node-version: 16.8.x
cache: npm
- name: Install Node.js dependencies
run: npm ci
- name: Run broken github/github link check
run: |
script/check-github-github-links.js > broken_github_github_links.md
# check-github-github-links.js returns 0 if no links are broken, and 1 if any links
# are broken. When an Actions step's exit code is 1, the action run's job status
# is failure and the run ends. The following steps create an issue for the
# broken link report only if any links are broken, so `if: ${{ failure() }}`
# ensures the steps run despite the previous step's failure of the job.
#
# https://docs.github.com/actions/reference/context-and-expression-syntax-for-github-actions#job-status-check-functions
- if: ${{ failure() }}
name: Get title for issue
id: check
run: echo "::set-output name=title::$(head -1 broken_github_github_links.md)"
- if: ${{ failure() }}
name: Create issue from file
id: github-github-broken-link-report
uses: peter-evans/create-issue-from-file@b4f9ee0a9d4abbfc6986601d9b1a4f8f8e74c77e
with:
token: ${{ env.GITHUB_TOKEN }}
title: ${{ steps.check.outputs.title }}
content-filepath: ./broken_github_github_links.md
repository: ${{ env.REPORT_REPOSITORY }}
labels: ${{ env.REPORT_LABEL }}

View File

@@ -18,7 +18,7 @@ name: Repo Sync
on:
workflow_dispatch:
schedule:
- cron: '10,25,40,55 * * * *' # every 15 minutes
- cron: '10,40 * * * *' # every 30 minutes
jobs:
close-invalid-repo-sync:

View File

@@ -0,0 +1,141 @@
#!/usr/bin/env node
// [start-readme]
//
// Run this script to get all broken docs.github.com links in github/github
//
// [end-readme]
import { getContents, getPathsWithMatchingStrings } from './helpers/git-utils.js'
import got from 'got'
if (!process.env.GITHUB_TOKEN) {
console.error('Error! You must have a GITHUB_TOKEN set in an .env file to run this script.')
process.exit(1)
}
main()
async function main() {
const searchStrings = ['https://docs.github.com', 'GitHub help_url', 'GitHub developer_help_url']
const foundFiles = await getPathsWithMatchingStrings(searchStrings, 'github', 'github')
const searchFiles = [...foundFiles]
.filter((file) => endsWithAny(['.rb', '.yml', '.yaml', '.txt', '.pdf', '.erb', '.js'], file))
.filter(
(file) =>
!file.includes('test/') &&
!file.includes('app/views/') &&
!file.includes('config.') &&
!file.includes('app/api/description/')
)
const docsLinksFiles = []
const urlRegEx =
/https?:\/\/(www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&//=]*)/g
for (const file of searchFiles) {
const contents = await getContents('github', 'github', 'master', file)
if (
contents.includes('https://docs.github.com') ||
contents.includes('GitHub.help_url') ||
contents.includes('GitHub.developer_help_url')
) {
const docsIndices = getIndicesOf('https://docs.github.com', contents)
const helpIndices = getIndicesOf('GitHub.help_url', contents)
helpIndices.push(...getIndicesOf('GitHub.developer_help_url', contents))
if (docsIndices.length > 0) {
docsIndices.forEach((numIndex) => {
// Assuming we don't have links close to 500 characters long
const docsLink = contents.substring(numIndex, numIndex + 500).match(urlRegEx)
docsLinksFiles.push([docsLink[0].toString().replace(/[^a-zA-Z0-9]*$|\\n$/g, ''), file])
})
}
if (helpIndices.length > 0) {
helpIndices.forEach((numIndex) => {
// There are certain links like #{GitHub.help_url}#{learn_more_path} and #{GitHub.developer_help_url}#{learn_more_path} that we should skip
if (
(contents.substring(numIndex, numIndex + 11) === 'GitHub.help' &&
contents.charAt(numIndex + 16) !== '#') ||
(contents.substring(numIndex, numIndex + 16) === 'GitHub.developer' &&
contents.charAt(numIndex + 26) !== '#')
) {
const startSearchIndex = contents.indexOf('/', numIndex)
// Looking for the closest '/' after GitHub.developer_help_url or GitHub.help_url
// There are certain links that don't start with `/` so we want to skip those.
// If there's no `/` within 30 characters of GitHub.help_url/GitHub.developer_help_url, skip
if (startSearchIndex - numIndex < 30) {
const helpLink =
'https://docs.github.com' +
contents
.substring(
startSearchIndex,
regexIndexOf(
contents,
/\n|"\)|{@email_tracking_params}|\^http|Ahttps|example|This|TODO"|[{}|"%><.,')* ]/,
startSearchIndex + 1
)
)
.trim()
docsLinksFiles.push([helpLink, file])
}
}
})
}
}
}
const brokenLinks = []
await Promise.all(
docsLinksFiles.map(async (file) => {
try {
await got(file[0])
} catch {
brokenLinks.push(file)
}
})
)
if (!brokenLinks.length) {
console.log('All links are good!')
process.exit(0)
}
console.log(`Found ${brokenLinks.length} total broken links in github/github`)
console.log('```')
console.log(`${JSON.stringify([...brokenLinks], null, 2)}`)
console.log('```')
// Exit unsuccessfully if broken links are found.
process.exit(1)
}
function endsWithAny(suffixes, string) {
for (const suffix of suffixes) {
if (string.endsWith(suffix)) return true
}
return false
}
function getIndicesOf(searchString, string) {
const searchStrLen = searchString.length
if (searchStrLen === 0) return []
let startIndex = 0
let index
const indices = []
while ((index = string.indexOf(searchString, startIndex)) > -1) {
indices.push(index)
startIndex = index + searchStrLen
}
return indices
}
function regexIndexOf(string, regex, startPos) {
const indexOf = string.substring(startPos || 0).search(regex)
return indexOf >= 0 ? indexOf + (startPos || 0) : indexOf
}

View File

@@ -126,3 +126,46 @@ export async function createIssueComment(owner, repo, pullNumber, body) {
throw err
}
}
// Search for a string in a file in code and return the array of paths to files that contain string
export async function getPathsWithMatchingStrings(strArr, org, repo) {
const perPage = 100
const paths = new Set()
for (const str of strArr) {
try {
const q = `q=${str}+in:file+repo:${org}/${repo}`
let currentPage = 1
let totalCount = 0
let currentCount = 0
do {
const data = await searchCode(q, perPage, currentPage)
data.items.map((el) => paths.add(el.path))
totalCount = data.total_count
currentCount += data.items.length
currentPage++
} while (currentCount < totalCount)
} catch (err) {
console.log(`error searching for ${str} in ${org}/${repo}`)
throw err
}
}
return paths
}
async function searchCode(q, perPage, currentPage) {
try {
const { data } = await github.rest.search.code({
q,
per_page: perPage,
page: currentPage,
})
return data
} catch (err) {
console.log(`error searching for ${q} in code`)
throw err
}
}

View File

@@ -13,7 +13,7 @@ const MAX_CONCURRENT_REQUESTS = 50
jest.useFakeTimers()
describe('developer redirects', () => {
jest.setTimeout(3 * 60 * 1000)
jest.setTimeout(4 * 60 * 1000)
beforeAll(async () => {
// The first page load takes a long time so let's get it out of the way in