563 lines
19 KiB
JavaScript
563 lines
19 KiB
JavaScript
import fs from 'fs'
|
|
import path from 'path'
|
|
|
|
import { visit } from 'unist-util-visit'
|
|
import { fromMarkdown } from 'mdast-util-from-markdown'
|
|
import { toMarkdown } from 'mdast-util-to-markdown'
|
|
import yaml from 'js-yaml'
|
|
|
|
import frontmatter from './read-frontmatter.js'
|
|
import {
|
|
getPathWithLanguage,
|
|
getPathWithoutLanguage,
|
|
getPathWithoutVersion,
|
|
getVersionStringFromPath,
|
|
} from './path-utils.js'
|
|
import loadRedirects from './redirects/precompile.js'
|
|
import patterns from './patterns.js'
|
|
import { loadUnversionedTree, loadPages, loadPageMap } from './page-data.js'
|
|
import getRedirect, { splitPathByLanguage } from './get-redirect.js'
|
|
import nonEnterpriseDefaultVersion from './non-enterprise-default-version.js'
|
|
import { deprecated } from './enterprise-server-releases.js'
|
|
|
|
function objectClone(obj) {
|
|
try {
|
|
return structuredClone(obj)
|
|
} catch {
|
|
// Need to polyfill for Node 16 folks
|
|
// Using `yaml.load(yaml.dump(...))` is safe enough because this
|
|
// data itself came from the Yaml deserializing in frontmatter().
|
|
return yaml.load(yaml.dump(obj))
|
|
}
|
|
}
|
|
|
|
// That magical string that can be turned into th actual title when
|
|
// we, at runtime, render out the links
|
|
const AUTOTITLE = 'AUTOTITLE'
|
|
|
|
const Options = {
|
|
setAutotitle: false,
|
|
fixHref: false,
|
|
verbose: false,
|
|
strict: false,
|
|
}
|
|
|
|
export async function updateInternalLinks(files, options = {}) {
|
|
const opts = Object.assign({}, Options, options)
|
|
|
|
const results = []
|
|
|
|
const unversionedTree = await loadUnversionedTree(['en'])
|
|
const pageList = await loadPages(unversionedTree, ['en'])
|
|
const pageMap = await loadPageMap(pageList)
|
|
const redirects = await loadRedirects(pageList)
|
|
|
|
const context = {
|
|
pages: pageMap,
|
|
redirects,
|
|
currentLanguage: 'en',
|
|
userLanguage: 'en',
|
|
}
|
|
|
|
for (const file of files) {
|
|
try {
|
|
results.push({
|
|
file,
|
|
...(await updateFile(file, context, opts)),
|
|
})
|
|
} catch (err) {
|
|
console.warn(`The file it tried to process on exception was: ${file}`)
|
|
throw err
|
|
}
|
|
}
|
|
|
|
return results
|
|
}
|
|
|
|
async function updateFile(file, context, opts) {
|
|
const rawContent = fs.readFileSync(file, 'utf8')
|
|
const { data, content } = frontmatter(rawContent)
|
|
|
|
// Since this function can process both `.md` and `.yml` files,
|
|
// when treating a `.md` file, the `data` from `frontmatter(rawContent)`
|
|
// is easy. But when dealing a file like `data/learning-tracks/foo.yml`
|
|
// then the the `frontmatter(rawContent).data` always becomes `{}`.
|
|
// And since the Yaml file might contain arrays of internal linked
|
|
// pathnames, we have to re-read it fully.
|
|
if (file.endsWith('.yml')) {
|
|
Object.assign(data, yaml.load(content))
|
|
}
|
|
|
|
let newContent = content
|
|
const ast = fromMarkdown(newContent)
|
|
|
|
const replacements = []
|
|
const warnings = []
|
|
|
|
// The day we know with confidence that everyone us on Node >=17,
|
|
// we can change this to use `structuredClone` without the polyfill
|
|
// technique.
|
|
const newData = objectClone(data)
|
|
|
|
const ANY = Symbol('any')
|
|
const IS_ARRAY = Symbol('is array')
|
|
|
|
// This configuration determines which nested things to bother looking
|
|
// into.
|
|
const HAS_LINKS = {
|
|
featuredLinks: ['gettingStarted', 'startHere', 'guideCards', 'popular'],
|
|
introLinks: ANY,
|
|
includeGuides: IS_ARRAY,
|
|
}
|
|
|
|
if (
|
|
file.split(path.sep).includes('data') &&
|
|
file.split(path.sep).includes('learning-tracks') &&
|
|
file.endsWith('.yml')
|
|
) {
|
|
// data/learning-tracks/**/*.yml files are different because the keys
|
|
// are abitrary but what they might all have in common is a key
|
|
// there called `guides`
|
|
for (const key of Object.keys(data)) {
|
|
HAS_LINKS[key] = ['guides']
|
|
}
|
|
}
|
|
|
|
for (const [key, seek] of Object.entries(HAS_LINKS)) {
|
|
if (!(key in data)) {
|
|
continue
|
|
}
|
|
try {
|
|
if (Array.isArray(data[key])) {
|
|
if ((Array.isArray(seek) && seek.includes(key)) || seek === IS_ARRAY || seek === ANY) {
|
|
const better = getNewFrontmatterLinkList(data[key], context, opts, file, rawContent)
|
|
if (!equalArray(better, data[key])) {
|
|
newData[key] = better
|
|
}
|
|
}
|
|
} else {
|
|
for (const [group, thing] of Object.entries(data[key])) {
|
|
if (Array.isArray(thing)) {
|
|
if (
|
|
(Array.isArray(seek) && seek.includes(group)) ||
|
|
seek === IS_ARRAY ||
|
|
seek === ANY
|
|
) {
|
|
const better = getNewFrontmatterLinkList(thing, context, opts, file, rawContent)
|
|
if (!equalArray(better, thing)) {
|
|
newData[key][group] = better
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} catch (error) {
|
|
// When in strict mode, if it throws an error that stacktrace will
|
|
// bubble up to the CLI. And the CLI will mention which file it
|
|
// was processing when it failed. But we have a valuable piece of
|
|
// information here about which frontmatter key it was that failed.
|
|
console.warn(`The frontmatter key it processed and failed was '${key}'`)
|
|
throw error
|
|
}
|
|
}
|
|
|
|
const lineOffset = rawContent.replace(content, '').split(/\n/g).length - 1
|
|
|
|
visit(ast, definitionMatcher, (node) => {
|
|
const asMarkdown = toMarkdown(node).trim()
|
|
// E.g. `[foo]: /bar`
|
|
if (content.includes(asMarkdown)) {
|
|
if (opts.fixHref) {
|
|
let newHref = node.url
|
|
const { label } = node
|
|
const betterHref = getNewHref(newHref, context, opts, file)
|
|
// getNewHref() might return a deliberate `undefined` if the
|
|
// new href value could not be computed for some reason.
|
|
if (betterHref !== undefined) {
|
|
newHref = betterHref
|
|
}
|
|
const newAsMarkdown = `[${label}]: ${newHref}`
|
|
if (asMarkdown !== newAsMarkdown) {
|
|
// Something can be improved!
|
|
const column = node.position.start.column
|
|
const line = node.position.start.line + lineOffset
|
|
replacements.push({
|
|
asMarkdown,
|
|
newAsMarkdown,
|
|
line,
|
|
column,
|
|
})
|
|
newContent = newContent.replace(asMarkdown, newAsMarkdown)
|
|
}
|
|
}
|
|
}
|
|
})
|
|
|
|
visit(ast, linkMatcher, (node) => {
|
|
const asMarkdown = toMarkdown(node).trim()
|
|
if (content.includes(asMarkdown)) {
|
|
// The title part of the link might be more Markdown.
|
|
// For example...
|
|
//
|
|
// [This *is* cool](/articles/link)
|
|
//
|
|
// In that case, for this link node, the title is the combined
|
|
// serialization of `node.children`. But `toMarkdown()` always appends
|
|
// `\n` to the serialized output.
|
|
// Now the title, of the above-mentioned example becomes 'This *is* cool'
|
|
// which is unlikely to attempt to be the documents title, that
|
|
// it links to.
|
|
const title = node.children.map((child) => toMarkdown(child).slice(0, -1)).join('')
|
|
|
|
let newTitle = title
|
|
let newHref = node.url
|
|
|
|
const hasQuotesAroundLink = content.includes(`"${asMarkdown}`)
|
|
|
|
if (opts.setAutotitle) {
|
|
if (hasQuotesAroundLink) {
|
|
/**
|
|
* Note! A lot of internal links are bullet points like:
|
|
*
|
|
* - [Creating a repository](/articles/create-a-repo)
|
|
* - [Forking a repository](/articles/fork-a-repo)
|
|
* or
|
|
* 1. [Set your username in Git](/github/getting-started-with-github/setting-your-username-in-git).
|
|
* 1. [Set your commit email address in Git](/articles/setting-your-commit-email-address).
|
|
*
|
|
* Perhaps we could recognize them as such an consider them
|
|
* matches anyway. In particular if the title is make up
|
|
* a leading capital letter any most rest in lower case.
|
|
*/
|
|
|
|
if (title !== AUTOTITLE) {
|
|
newTitle = AUTOTITLE
|
|
}
|
|
} else {
|
|
/**
|
|
* The Markdown link sometimes is written like this:
|
|
*
|
|
* ["This is the title](/foo/bar)."
|
|
*
|
|
* or...
|
|
*
|
|
* ["This is the title"](/foo/bar).
|
|
*/
|
|
if (node.children && node.children.length > 0 && node.children[0].value) {
|
|
if (singleStartingQuote(node.children[0].value)) {
|
|
const column = node.position.start.column
|
|
const line = node.position.start.line + lineOffset
|
|
warnings.push({
|
|
warning: 'Starts with a single " inside the text',
|
|
asMarkdown,
|
|
line,
|
|
column,
|
|
})
|
|
} else if (isSimpleQuote(node.children[0].value)) {
|
|
const column = node.position.start.column
|
|
const line = node.position.start.line + lineOffset
|
|
warnings.push({
|
|
warning: 'Starts and ends with a " inside the text',
|
|
asMarkdown,
|
|
line,
|
|
column,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (opts.fixHref) {
|
|
const betterHref = getNewHref(node.url, context, opts, file)
|
|
// getNewHref() might return a deliberate `undefined` if the
|
|
// new href value could not be computed for some reason.
|
|
if (betterHref !== undefined) {
|
|
newHref = betterHref
|
|
}
|
|
}
|
|
const newAsMarkdown = `[${newTitle}](${newHref})`
|
|
if (asMarkdown !== newAsMarkdown) {
|
|
// Something can be improved!
|
|
const column = node.position.start.column
|
|
const line = node.position.start.line + lineOffset
|
|
replacements.push({
|
|
asMarkdown,
|
|
newAsMarkdown,
|
|
line,
|
|
column,
|
|
})
|
|
newContent = newContent.replace(asMarkdown, newAsMarkdown)
|
|
}
|
|
} else if (opts.verbose) {
|
|
console.warn(
|
|
`Unable to find link as Markdown ('${asMarkdown}') in the source content (${file})`
|
|
)
|
|
}
|
|
})
|
|
|
|
return {
|
|
data,
|
|
content,
|
|
rawContent,
|
|
newContent,
|
|
replacements,
|
|
warnings,
|
|
newData,
|
|
}
|
|
}
|
|
|
|
function definitionMatcher(node) {
|
|
const { type, url } = node
|
|
if (type === 'definition' && url) {
|
|
return url.startsWith('/')
|
|
}
|
|
return false
|
|
}
|
|
|
|
function linkMatcher(node) {
|
|
if (node.type === 'link' && node.url) {
|
|
const { url } = node
|
|
if (url.startsWith('/') || url.startsWith('./')) {
|
|
// Sometimes there's a link to view the asset as a separate link.
|
|
// Skip these because they ultimately link to an actual Page.
|
|
if (url.startsWith('/assets') || url.startsWith('/public/')) {
|
|
return false
|
|
}
|
|
|
|
// If a link uses Liquid we can't process it. It would require full
|
|
// rendering which this script is not doing.
|
|
if (url.includes('{{') || url.includes('{%')) {
|
|
return false
|
|
}
|
|
|
|
// Sometimes we link to archived enterprise-server versions. These
|
|
// can never be updated because although they appear to be internal,
|
|
// they are, in a sense external. For example:
|
|
// See "[This old thing](/enterprise-server@3.1/some/page)".
|
|
// Skip these
|
|
const version = getVersionStringFromPath(url)
|
|
if (
|
|
version &&
|
|
version.startsWith('enterprise-server@') &&
|
|
deprecated.includes(version.replace('enterprise-server@', ''))
|
|
) {
|
|
return false
|
|
}
|
|
|
|
// Really old versions like `/enterprise/2.1` don't need to be
|
|
// corrected because they're deliberately pointing to archived
|
|
// versions.
|
|
if (patterns.getEnterpriseVersionNumber.test(url)) {
|
|
return false
|
|
}
|
|
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
function getNewFrontmatterLinkList(list, context, opts, file, rawContent) {
|
|
/**
|
|
* The `list` is expected to all be strings. Sometimes they're like this:
|
|
*
|
|
* /search-github/searching-on-github/searching-for-repositories
|
|
*
|
|
* Sometimes they're like this:
|
|
*
|
|
* {% ifversion fpt or ghec or ghes > 3.4 %}/pages/getting-started-with-github-pages{% endif %}
|
|
*
|
|
* In the case of Liquid, we have to temporarily remove it to be able to
|
|
* test the path as a URL.
|
|
**/
|
|
|
|
const better = []
|
|
for (const entry of list) {
|
|
if (/{%\s*else\s*%}/.test(entry)) {
|
|
console.warn(`Skipping frontmatter link with {% else %} in it: ${entry}. (file: ${file})`)
|
|
better.push(entry)
|
|
continue
|
|
}
|
|
const pure = stripLiquid(entry)
|
|
let asURL = '/en'
|
|
if (!pure.startsWith('/')) {
|
|
asURL += '/'
|
|
}
|
|
asURL += pure
|
|
if (asURL in context.pages) {
|
|
better.push(entry)
|
|
} else {
|
|
const redirected = getRedirect(asURL, context)
|
|
if (redirected === undefined) {
|
|
const lineNumber = findLineNumber(entry, rawContent)
|
|
const msg =
|
|
'A frontmatter link appears to be broken. ' +
|
|
`Neither redirect or a findable page: ${pure}. (file: ${file} line: ${
|
|
lineNumber || 'unknown'
|
|
})`
|
|
|
|
if (opts.strict) {
|
|
throw new Error(msg)
|
|
}
|
|
console.warn(`WARNING: ${msg}`)
|
|
better.push(entry)
|
|
} else {
|
|
// Perhaps it just redirected to a specific version
|
|
const redirectedWithoutLanguage = getPathWithoutLanguage(redirected)
|
|
const asURLWithoutVersion = getPathWithoutVersion(redirectedWithoutLanguage)
|
|
if (asURLWithoutVersion === pure) {
|
|
better.push(entry)
|
|
} else {
|
|
better.push(entry.replace(pure, asURLWithoutVersion))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return better
|
|
}
|
|
|
|
// Try to return the line in the raw content that entry was on.
|
|
// It's hard to know exactly because the `entry` is the result of parsing
|
|
// the YAML, most likely, from the front
|
|
function findLineNumber(entry, rawContent) {
|
|
let number = 0
|
|
for (const line of rawContent.split(/\n/g)) {
|
|
number++
|
|
if (line.endsWith(entry) && line.includes(` ${entry}`)) {
|
|
return number
|
|
}
|
|
}
|
|
|
|
return null
|
|
}
|
|
|
|
const liquidStartRex = /^{%-?\s*ifversion .+?\s*%}/
|
|
const liquidEndRex = /{%-?\s*endif\s*-?%}$/
|
|
|
|
// Return
|
|
//
|
|
// /foo/bar
|
|
//
|
|
// if the text input was
|
|
//
|
|
// {% ifversion ghes%}/foo/bar{%endif %}
|
|
//
|
|
// And if no liquid, just return as is.
|
|
function stripLiquid(text) {
|
|
if (liquidStartRex.test(text) && liquidEndRex.test(text)) {
|
|
return text.replace(liquidStartRex, '').replace(liquidEndRex, '').trim()
|
|
} else if (text.includes('{')) {
|
|
throw new Error(`Unsupported Liquid in frontmatter link list (${text})`)
|
|
}
|
|
return text
|
|
}
|
|
|
|
function equalArray(arr1, arr2) {
|
|
return arr1.length === arr2.length && arr1.every((item, i) => item === arr2[i])
|
|
}
|
|
|
|
function getNewHref(href, context, opts, file) {
|
|
const { currentLanguage } = context
|
|
const parsed = new URL(href, 'https://docs.github.com')
|
|
const hash = parsed.hash
|
|
const search = parsed.search
|
|
const pure = parsed.pathname
|
|
let newHref = pure.replace(patterns.trailingSlash, '$1')
|
|
|
|
// Before testing if it redirects takes it somewhere, we temporarily
|
|
// pretend it's already prefixed for English (/en)
|
|
const [language, withoutLanguage] = splitPathByLanguage(newHref, currentLanguage)
|
|
if (withoutLanguage !== newHref) {
|
|
// It means the link already had a language in it
|
|
const msg = `Unable to cope with internal links with hardcoded language '${newHref}' (file: ${file})`
|
|
if (opts.strict) {
|
|
throw new Error(msg)
|
|
} else {
|
|
console.warn(`WARNING: ${msg}`)
|
|
return
|
|
}
|
|
}
|
|
const newHrefWithLanguage = getPathWithLanguage(withoutLanguage, language)
|
|
const redirected = getRedirect(newHrefWithLanguage, context)
|
|
|
|
// If it comes back as `undefined` it means it didn't need to be
|
|
// redirected, specifically.
|
|
// Optionally, we could skip this whole step of checking for completely
|
|
// broken internal links because other tools will later check that.
|
|
if (redirected === undefined) {
|
|
if (!context.pages[newHrefWithLanguage]) {
|
|
// If this happens, it's very possible that it's a broken link
|
|
const msg = `A link appears to be broken. Neither redirect or a findable page '${href}' (${file})`
|
|
if (opts.strict) {
|
|
throw new Error(msg)
|
|
} else {
|
|
console.warn(`WARNING: ${msg}`)
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
if (redirected) {
|
|
// The getRedirect() function will produce a final URL that the user
|
|
// can use, but that means it also injects the language in there.
|
|
// For updating the content statically, we don't want that.
|
|
// Note: It could be an idea to somehow tell getRedirect() to not
|
|
// bother but perhaps it adds unnecessarily complexity to a function that
|
|
// has to work perfectly for runtime.
|
|
const redirectedWithoutLanguage = getPathWithoutLanguage(redirected)
|
|
// Some paths can't be viewed in fre-pro-team so the getRedirect()
|
|
// function will inject the version that you're supposed to go to.
|
|
// For example `/enterprise/admin/guides/installation/configuring-a-hostname`
|
|
// redirects to `/enterprise-server@3.7/admin/configuration/configuring-...`
|
|
// (at the time of writing) which is good when you're actually clicking
|
|
// the link but not good when we're trying to update the source
|
|
// content.
|
|
// The `getPathWithoutVersion` function doesn't change the input if
|
|
// the URL passed doesn't appear to have a valid version in it already.
|
|
// I.e. `getPathWithLanguage('/get-started') === '/get-started``
|
|
// but `getPathWithLanguage('/enterprise-server@3.8/get-started') === '/get-started``
|
|
// But hang on, in some rare cases the content deliberately linked to
|
|
// a specific version. If that's the case, leave it like that.
|
|
// There's another exception! Some links have the `/free-pro-team@latest/`
|
|
// prefix. The `getRedirect()` will always remove that. If that's the case
|
|
// we always want respect that and put it back in.
|
|
if (withoutLanguage.includes(`/${nonEnterpriseDefaultVersion}/`)) {
|
|
newHref = `/${nonEnterpriseDefaultVersion}${redirectedWithoutLanguage}`
|
|
} else if (withoutLanguage.startsWith('/enterprise-server/')) {
|
|
const msg =
|
|
"Old /enterprise-server/ links that don't include a @version is no longer supported. " +
|
|
'If you see this, manually fix that link to use enterprise-server@latest.'
|
|
if (opts.strict) {
|
|
throw new Error(msg)
|
|
} else {
|
|
console.warn(msg)
|
|
return
|
|
}
|
|
} else if (withoutLanguage.startsWith('/enterprise-server@latest')) {
|
|
// getRedirect() will always replace `enterprise-server@latest` with
|
|
// whatever the latest number is. E.g. `enterprise-server@3.9`.
|
|
// But we have to "undo" that.
|
|
newHref = `/enterprise-server@latest${getPathWithoutVersion(redirectedWithoutLanguage)}`
|
|
} else if (getPathWithoutVersion(withoutLanguage) !== withoutLanguage) {
|
|
newHref = redirectedWithoutLanguage
|
|
} else {
|
|
newHref = getPathWithoutVersion(redirectedWithoutLanguage)
|
|
}
|
|
}
|
|
|
|
if (search) {
|
|
newHref += search
|
|
}
|
|
if (hash) {
|
|
newHref += hash
|
|
}
|
|
return newHref
|
|
}
|
|
|
|
function singleStartingQuote(text) {
|
|
return text.startsWith('"') && text.split('"').length === 2
|
|
}
|
|
|
|
function isSimpleQuote(text) {
|
|
return text.startsWith('"') && text.endsWith('"') && text.split('"').length === 3
|
|
}
|