75
.github/workflows/check-broken-links-github-github.yml
vendored
Normal file
75
.github/workflows/check-broken-links-github-github.yml
vendored
Normal file
@@ -0,0 +1,75 @@
|
||||
name: Check Broken Docs Links in github/github
|
||||
|
||||
# **What it does**: This checks for any broken docs.github.com links in github/github
|
||||
# **Why we have it**: Make sure all docs in github/github are up to date
|
||||
# **Who does it impact**: Docs engineering, people on GitHub
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: '20 13 * * 1' # run every Monday at 1:20PM UTC
|
||||
|
||||
# **IMPORTANT:** Do not change the FREEZE environment variable set here!
|
||||
# This workflow runs on a recurring basis. To temporarily disable it (e.g.,
|
||||
# during a docs deployment freeze), add an Actions Secret to the repo settings
|
||||
# called `FREEZE` with a value of `true`. To re-enable workflow, simply
|
||||
# delete that Secret from the repo settings. The environment variable here
|
||||
# will duplicate that Secret's value for later evaluation.
|
||||
env:
|
||||
FREEZE: ${{ secrets.FREEZE }}
|
||||
|
||||
jobs:
|
||||
check_github_github_links:
|
||||
if: github.repository == 'github/docs-internal'
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
# need to use a token from a user with access to github/github for this step
|
||||
GITHUB_TOKEN: ${{ secrets.DOCS_BOT_FR }}
|
||||
FIRST_RESPONDER_PROJECT: Docs content first responder
|
||||
REPORT_AUTHOR: docubot
|
||||
REPORT_LABEL: github github broken link report
|
||||
REPORT_REPOSITORY: github/docs-content
|
||||
steps:
|
||||
- if: ${{ env.FREEZE == 'true' }}
|
||||
run: |
|
||||
echo 'The repo is currently frozen! Exiting this workflow.'
|
||||
exit 1 # prevents further steps from running
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@5a4ac9002d0be2fb38bd78e4b4dbde5606d7042f
|
||||
|
||||
- name: Setup Node
|
||||
uses: actions/setup-node@38d90ce44d5275ad62cc48384b3d8a58c500bb5f
|
||||
with:
|
||||
node-version: 16.8.x
|
||||
cache: npm
|
||||
|
||||
- name: Install Node.js dependencies
|
||||
run: npm ci
|
||||
|
||||
- name: Run broken github/github link check
|
||||
run: |
|
||||
script/check-github-github-links.js > broken_github_github_links.md
|
||||
|
||||
# check-github-github-links.js returns 0 if no links are broken, and 1 if any links
|
||||
# are broken. When an Actions step's exit code is 1, the action run's job status
|
||||
# is failure and the run ends. The following steps create an issue for the
|
||||
# broken link report only if any links are broken, so `if: ${{ failure() }}`
|
||||
# ensures the steps run despite the previous step's failure of the job.
|
||||
#
|
||||
# https://docs.github.com/actions/reference/context-and-expression-syntax-for-github-actions#job-status-check-functions
|
||||
|
||||
- if: ${{ failure() }}
|
||||
name: Get title for issue
|
||||
id: check
|
||||
run: echo "::set-output name=title::$(head -1 broken_github_github_links.md)"
|
||||
- if: ${{ failure() }}
|
||||
name: Create issue from file
|
||||
id: github-github-broken-link-report
|
||||
uses: peter-evans/create-issue-from-file@b4f9ee0a9d4abbfc6986601d9b1a4f8f8e74c77e
|
||||
with:
|
||||
token: ${{ env.GITHUB_TOKEN }}
|
||||
title: ${{ steps.check.outputs.title }}
|
||||
content-filepath: ./broken_github_github_links.md
|
||||
repository: ${{ env.REPORT_REPOSITORY }}
|
||||
labels: ${{ env.REPORT_LABEL }}
|
||||
2
.github/workflows/repo-sync.yml
vendored
2
.github/workflows/repo-sync.yml
vendored
@@ -18,7 +18,7 @@ name: Repo Sync
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: '10,25,40,55 * * * *' # every 15 minutes
|
||||
- cron: '10,40 * * * *' # every 30 minutes
|
||||
|
||||
jobs:
|
||||
close-invalid-repo-sync:
|
||||
|
||||
141
script/check-github-github-links.js
Executable file
141
script/check-github-github-links.js
Executable file
@@ -0,0 +1,141 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// [start-readme]
|
||||
//
|
||||
// Run this script to get all broken docs.github.com links in github/github
|
||||
//
|
||||
// [end-readme]
|
||||
|
||||
import { getContents, getPathsWithMatchingStrings } from './helpers/git-utils.js'
|
||||
import got from 'got'
|
||||
|
||||
if (!process.env.GITHUB_TOKEN) {
|
||||
console.error('Error! You must have a GITHUB_TOKEN set in an .env file to run this script.')
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
main()
|
||||
|
||||
async function main() {
|
||||
const searchStrings = ['https://docs.github.com', 'GitHub help_url', 'GitHub developer_help_url']
|
||||
const foundFiles = await getPathsWithMatchingStrings(searchStrings, 'github', 'github')
|
||||
const searchFiles = [...foundFiles]
|
||||
.filter((file) => endsWithAny(['.rb', '.yml', '.yaml', '.txt', '.pdf', '.erb', '.js'], file))
|
||||
.filter(
|
||||
(file) =>
|
||||
!file.includes('test/') &&
|
||||
!file.includes('app/views/') &&
|
||||
!file.includes('config.') &&
|
||||
!file.includes('app/api/description/')
|
||||
)
|
||||
|
||||
const docsLinksFiles = []
|
||||
const urlRegEx =
|
||||
/https?:\/\/(www\.)?[-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&//=]*)/g
|
||||
|
||||
for (const file of searchFiles) {
|
||||
const contents = await getContents('github', 'github', 'master', file)
|
||||
|
||||
if (
|
||||
contents.includes('https://docs.github.com') ||
|
||||
contents.includes('GitHub.help_url') ||
|
||||
contents.includes('GitHub.developer_help_url')
|
||||
) {
|
||||
const docsIndices = getIndicesOf('https://docs.github.com', contents)
|
||||
const helpIndices = getIndicesOf('GitHub.help_url', contents)
|
||||
helpIndices.push(...getIndicesOf('GitHub.developer_help_url', contents))
|
||||
if (docsIndices.length > 0) {
|
||||
docsIndices.forEach((numIndex) => {
|
||||
// Assuming we don't have links close to 500 characters long
|
||||
const docsLink = contents.substring(numIndex, numIndex + 500).match(urlRegEx)
|
||||
docsLinksFiles.push([docsLink[0].toString().replace(/[^a-zA-Z0-9]*$|\\n$/g, ''), file])
|
||||
})
|
||||
}
|
||||
|
||||
if (helpIndices.length > 0) {
|
||||
helpIndices.forEach((numIndex) => {
|
||||
// There are certain links like #{GitHub.help_url}#{learn_more_path} and #{GitHub.developer_help_url}#{learn_more_path} that we should skip
|
||||
if (
|
||||
(contents.substring(numIndex, numIndex + 11) === 'GitHub.help' &&
|
||||
contents.charAt(numIndex + 16) !== '#') ||
|
||||
(contents.substring(numIndex, numIndex + 16) === 'GitHub.developer' &&
|
||||
contents.charAt(numIndex + 26) !== '#')
|
||||
) {
|
||||
const startSearchIndex = contents.indexOf('/', numIndex)
|
||||
// Looking for the closest '/' after GitHub.developer_help_url or GitHub.help_url
|
||||
// There are certain links that don't start with `/` so we want to skip those.
|
||||
// If there's no `/` within 30 characters of GitHub.help_url/GitHub.developer_help_url, skip
|
||||
if (startSearchIndex - numIndex < 30) {
|
||||
const helpLink =
|
||||
'https://docs.github.com' +
|
||||
contents
|
||||
.substring(
|
||||
startSearchIndex,
|
||||
regexIndexOf(
|
||||
contents,
|
||||
/\n|"\)|{@email_tracking_params}|\^http|Ahttps|example|This|TODO"|[{}|"%><.,')* ]/,
|
||||
startSearchIndex + 1
|
||||
)
|
||||
)
|
||||
.trim()
|
||||
docsLinksFiles.push([helpLink, file])
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
const brokenLinks = []
|
||||
await Promise.all(
|
||||
docsLinksFiles.map(async (file) => {
|
||||
try {
|
||||
await got(file[0])
|
||||
} catch {
|
||||
brokenLinks.push(file)
|
||||
}
|
||||
})
|
||||
)
|
||||
if (!brokenLinks.length) {
|
||||
console.log('All links are good!')
|
||||
process.exit(0)
|
||||
}
|
||||
|
||||
console.log(`Found ${brokenLinks.length} total broken links in github/github`)
|
||||
console.log('```')
|
||||
|
||||
console.log(`${JSON.stringify([...brokenLinks], null, 2)}`)
|
||||
|
||||
console.log('```')
|
||||
// Exit unsuccessfully if broken links are found.
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
function endsWithAny(suffixes, string) {
|
||||
for (const suffix of suffixes) {
|
||||
if (string.endsWith(suffix)) return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
function getIndicesOf(searchString, string) {
|
||||
const searchStrLen = searchString.length
|
||||
if (searchStrLen === 0) return []
|
||||
|
||||
let startIndex = 0
|
||||
let index
|
||||
const indices = []
|
||||
|
||||
while ((index = string.indexOf(searchString, startIndex)) > -1) {
|
||||
indices.push(index)
|
||||
startIndex = index + searchStrLen
|
||||
}
|
||||
|
||||
return indices
|
||||
}
|
||||
|
||||
function regexIndexOf(string, regex, startPos) {
|
||||
const indexOf = string.substring(startPos || 0).search(regex)
|
||||
|
||||
return indexOf >= 0 ? indexOf + (startPos || 0) : indexOf
|
||||
}
|
||||
@@ -126,3 +126,46 @@ export async function createIssueComment(owner, repo, pullNumber, body) {
|
||||
throw err
|
||||
}
|
||||
}
|
||||
|
||||
// Search for a string in a file in code and return the array of paths to files that contain string
|
||||
export async function getPathsWithMatchingStrings(strArr, org, repo) {
|
||||
const perPage = 100
|
||||
const paths = new Set()
|
||||
|
||||
for (const str of strArr) {
|
||||
try {
|
||||
const q = `q=${str}+in:file+repo:${org}/${repo}`
|
||||
let currentPage = 1
|
||||
let totalCount = 0
|
||||
let currentCount = 0
|
||||
|
||||
do {
|
||||
const data = await searchCode(q, perPage, currentPage)
|
||||
data.items.map((el) => paths.add(el.path))
|
||||
totalCount = data.total_count
|
||||
currentCount += data.items.length
|
||||
currentPage++
|
||||
} while (currentCount < totalCount)
|
||||
} catch (err) {
|
||||
console.log(`error searching for ${str} in ${org}/${repo}`)
|
||||
throw err
|
||||
}
|
||||
}
|
||||
|
||||
return paths
|
||||
}
|
||||
|
||||
async function searchCode(q, perPage, currentPage) {
|
||||
try {
|
||||
const { data } = await github.rest.search.code({
|
||||
q,
|
||||
per_page: perPage,
|
||||
page: currentPage,
|
||||
})
|
||||
|
||||
return data
|
||||
} catch (err) {
|
||||
console.log(`error searching for ${q} in code`)
|
||||
throw err
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ const MAX_CONCURRENT_REQUESTS = 50
|
||||
jest.useFakeTimers()
|
||||
|
||||
describe('developer redirects', () => {
|
||||
jest.setTimeout(3 * 60 * 1000)
|
||||
jest.setTimeout(4 * 60 * 1000)
|
||||
|
||||
beforeAll(async () => {
|
||||
// The first page load takes a long time so let's get it out of the way in
|
||||
|
||||
Reference in New Issue
Block a user