1
0
mirror of synced 2025-12-20 10:28:40 -05:00
Files
docs/script/update-internal-links.js
Sarah Schneider fc18a1b6ea fix the regex
2021-03-02 15:11:31 -05:00

198 lines
7.5 KiB
JavaScript
Executable File

#!/usr/bin/env node
const fs = require('fs')
const walk = require('walk-sync')
const path = require('path')
const astFromMarkdown = require('mdast-util-from-markdown')
const visit = require('unist-util-visit')
const { loadPages, loadPageMap } = require('../lib/pages')
const loadSiteData = require('../lib/site-data')
const loadRedirects = require('../lib/redirects/precompile')
const { getPathWithoutLanguage, getPathWithoutVersion } = require('../lib/path-utils')
const allVersions = Object.keys(require('../lib/all-versions'))
const frontmatter = require('../lib/read-frontmatter')
const renderContent = require('../lib/render-content')
const patterns = require('../lib/patterns')
const walkFiles = (pathToWalk) => {
return walk(path.join(process.cwd(), pathToWalk), { includeBasePath: true, directories: false })
.filter(file => file.endsWith('.md') && !file.endsWith('README.md'))
.filter(file => !file.includes('/early-access/')) // ignore EA for now
}
const allFiles = walkFiles('content').concat(walkFiles('data'))
// The script will throw an error if it finds any markup not represented here.
// Hacky but it captures the current rare edge cases.
const linkInlineMarkup = {
emphasis: '*',
strong: '**'
}
const currentVersionWithSpacesRegex = /\/enterprise\/{{ currentVersion }}/g
const currentVersionWithoutSpaces = '/enterprise/{{currentVersion}}'
// [start-readme]
//
// Run this script to find internal links in all content and data Markdown files, check if either the title or link
// (or both) are outdated, and automatically update them if so.
//
// Exceptions:
// * Links with fragments (e.g., [Bar](/foo#bar)) will get their root links updated if necessary, but the fragment
// and title will be unchanged (e.g., [Bar](/noo#bar)).
// * Links with hardcoded versions (e.g., [Foo](/enterprise-server/baz)) will get their root links updated if
// necessary, but the hardcoded versions will be preserved (e.g., [Foo](/enterprise-server/qux)).
// * Links with Liquid in the titles will have their root links updated if necessary, but the titles will be preserved.
//
// [end-readme]
main()
async function main () {
console.log('Working...')
const pageList = await loadPages()
const pageMap = await loadPageMap(pageList)
const redirects = await loadRedirects(pageList)
const site = await loadSiteData()
const context = {
pages: pageMap,
redirects,
site: site.en.site,
currentLanguage: 'en'
}
for (const file of allFiles) {
const { data, content } = frontmatter(fs.readFileSync(file, 'utf8'))
let newContent = content
// Do a blanket find-replace for /enterprise/{{ currentVersion }}/ to /enterprise/{{currentVersion}}/
// so that the AST parser recognizes the link as a link node. The spaces prevent it from doing so.
newContent = newContent.replace(currentVersionWithSpacesRegex, currentVersionWithoutSpaces)
const ast = astFromMarkdown(newContent)
// We can't do async functions within visit, so gather the nodes upfront
const nodesPerFile = []
visit(ast, node => {
if (node.type !== 'link') return
if (!node.url.startsWith('/')) return
if (node.url.startsWith('/assets')) return
if (node.url.startsWith('/public')) return
if (node.url.includes('/11.10.340/')) return
if (node.url.includes('/2.1/')) return
if (node.url === '/') return
nodesPerFile.push(node)
})
// For every Markdown link...
for (const node of nodesPerFile) {
const oldLink = node.url
// Find and preserve any inline markup in link titles, like [*Foo*](/foo)
let inlineMarkup = ''
if (node.children[0].children) {
inlineMarkup = linkInlineMarkup[node.children[0].type]
if (!inlineMarkup) {
console.error(`Cannot find an inline markup entry for ${node.children[0].type}!`)
process.exit(1)
}
}
const oldTitle = node.children[0].value || node.children[0].children[0].value
const oldMarkdownLink = `[${inlineMarkup}${oldTitle}${inlineMarkup}](${oldLink})`
// As a blanket rule, only update titles in links that begin with quotes. (Many links
// have punctuation before the closing quotes, so we'll only check for opening quotes.)
// Update: "[Foo](/foo)
// Do not update: [Bar](/bar)
const hasQuotesAroundLink = newContent.includes(`"${oldMarkdownLink}`)
let foundPage, fragmentMatch, versionMatch
// Run through all supported versions...
for (const version of allVersions) {
context.currentVersion = version
// Render the link for each version using the renderContent pipeline, which includes the rewrite-local-links plugin.
const $ = await renderContent(oldMarkdownLink, context, { cheerioObject: true })
let linkToCheck = $('a').attr('href')
// We need to preserve fragments and hardcoded versions if any are found.
fragmentMatch = oldLink.match(/(#.*$)/)
versionMatch = oldLink.match(/(enterprise-server(?:@.[^\/]*?)?)\//)
// Remove the fragment for now.
linkToCheck = linkToCheck
.replace(/#.*$/, '')
.replace(patterns.trailingSlash, '$1')
// Try to find the rendered link in the set of pages!
foundPage = findPage(linkToCheck, pageMap, redirects)
// Once a page is found for a particular version, exit immediately; we don't need to check the other versions
// because all we care about is the page title and path.
if (foundPage) {
break
}
}
if (!foundPage) {
console.error(`Can't find link in pageMap! ${oldLink} in ${file.replace(process.cwd(), '')}`)
process.exit(1)
}
// If the original link includes a fragment OR the original title includes Liquid, do not change;
// otherwise, use the found page title. (We don't want to update the title if a fragment is found because
// the title likely points to the fragment section header, not the page title.)
const newTitle = fragmentMatch || oldTitle.includes('{%') || !hasQuotesAroundLink ? oldTitle : foundPage.title
// If the original link includes a fragment, append it to the found page path.
// Also remove the language code because Markdown links don't include language codes.
let newLink = getPathWithoutLanguage(fragmentMatch ? foundPage.path + fragmentMatch[1] : foundPage.path)
// If the original link includes a hardcoded version, preserve it; otherwise, remove versioning
// because Markdown links don't include versioning.
newLink = versionMatch ? `/${versionMatch[1]}${getPathWithoutVersion(newLink)}` : getPathWithoutVersion(newLink)
let newMarkdownLink = `[${inlineMarkup}${newTitle}${inlineMarkup}](${newLink})`
// Handle a few misplaced quotation marks.
if (oldMarkdownLink.includes('["')) {
newMarkdownLink = `"${newMarkdownLink}`
}
// Stream the results to console as we find them.
if (oldMarkdownLink !== newMarkdownLink) {
console.log('old link', oldMarkdownLink)
console.log('new link', newMarkdownLink)
console.log('-------')
}
newContent = newContent.replace(oldMarkdownLink, newMarkdownLink)
}
fs.writeFileSync(file, frontmatter.stringify(newContent, data, { lineWidth: 10000 }))
}
console.log('Done!')
}
function findPage (tryPath, pageMap, redirects) {
if (pageMap[tryPath]) {
return {
title: pageMap[tryPath].title,
path: tryPath
}
}
if (pageMap[redirects[tryPath]]) {
return {
title: pageMap[redirects[tryPath]].title,
path: redirects[tryPath]
}
}
}