import fs from 'fs' import path from 'path' import { visit } from 'unist-util-visit' import { fromMarkdown } from 'mdast-util-from-markdown' import { toMarkdown } from 'mdast-util-to-markdown' import yaml from 'js-yaml' import frontmatter from './read-frontmatter.js' import { getPathWithLanguage, getPathWithoutLanguage, getPathWithoutVersion, getVersionStringFromPath, } from './path-utils.js' import loadRedirects from './redirects/precompile.js' import patterns from './patterns.js' import { loadUnversionedTree, loadPages, loadPageMap } from './page-data.js' import getRedirect, { splitPathByLanguage } from './get-redirect.js' import nonEnterpriseDefaultVersion from './non-enterprise-default-version.js' import { deprecated } from './enterprise-server-releases.js' function objectClone(obj) { try { return structuredClone(obj) } catch { // Need to polyfill for Node 16 folks // Using `yaml.load(yaml.dump(...))` is safe enough because this // data itself came from the Yaml deserializing in frontmatter(). return yaml.load(yaml.dump(obj)) } } // That magical string that can be turned into th actual title when // we, at runtime, render out the links const AUTOTITLE = 'AUTOTITLE' const Options = { setAutotitle: false, fixHref: false, verbose: false, strict: false, } export async function updateInternalLinks(files, options = {}) { const opts = Object.assign({}, Options, options) const results = [] const unversionedTree = await loadUnversionedTree(['en']) const pageList = await loadPages(unversionedTree, ['en']) const pageMap = await loadPageMap(pageList) const redirects = await loadRedirects(pageList) const context = { pages: pageMap, redirects, currentLanguage: 'en', userLanguage: 'en', } for (const file of files) { try { results.push({ file, ...(await updateFile(file, context, opts)), }) } catch (err) { console.warn(`The file it tried to process on exception was: ${file}`) throw err } } return results } async function updateFile(file, context, opts) { const rawContent = fs.readFileSync(file, 'utf8') const { data, content } = frontmatter(rawContent) // Since this function can process both `.md` and `.yml` files, // when treating a `.md` file, the `data` from `frontmatter(rawContent)` // is easy. But when dealing a file like `data/learning-tracks/foo.yml` // then the the `frontmatter(rawContent).data` always becomes `{}`. // And since the Yaml file might contain arrays of internal linked // pathnames, we have to re-read it fully. if (file.endsWith('.yml')) { Object.assign(data, yaml.load(content)) } let newContent = content const ast = fromMarkdown(newContent) const replacements = [] const warnings = [] // The day we know with confidence that everyone us on Node >=17, // we can change this to use `structuredClone` without the polyfill // technique. const newData = objectClone(data) const ANY = Symbol('any') const IS_ARRAY = Symbol('is array') // This configuration determines which nested things to bother looking // into. const HAS_LINKS = { featuredLinks: ['gettingStarted', 'startHere', 'guideCards', 'popular'], introLinks: ANY, includeGuides: IS_ARRAY, } if ( file.split(path.sep).includes('data') && file.split(path.sep).includes('learning-tracks') && file.endsWith('.yml') ) { // data/learning-tracks/**/*.yml files are different because the keys // are abitrary but what they might all have in common is a key // there called `guides` for (const key of Object.keys(data)) { HAS_LINKS[key] = ['guides'] } } for (const [key, seek] of Object.entries(HAS_LINKS)) { if (!(key in data)) { continue } try { if (Array.isArray(data[key])) { if ((Array.isArray(seek) && seek.includes(key)) || seek === IS_ARRAY || seek === ANY) { const better = getNewFrontmatterLinkList(data[key], context, opts, file, rawContent) if (!equalArray(better, data[key])) { newData[key] = better } } } else { for (const [group, thing] of Object.entries(data[key])) { if (Array.isArray(thing)) { if ( (Array.isArray(seek) && seek.includes(group)) || seek === IS_ARRAY || seek === ANY ) { const better = getNewFrontmatterLinkList(thing, context, opts, file, rawContent) if (!equalArray(better, thing)) { newData[key][group] = better } } } } } } catch (error) { // When in strict mode, if it throws an error that stacktrace will // bubble up to the CLI. And the CLI will mention which file it // was processing when it failed. But we have a valuable piece of // information here about which frontmatter key it was that failed. console.warn(`The frontmatter key it processed and failed was '${key}'`) throw error } } const lineOffset = rawContent.replace(content, '').split(/\n/g).length - 1 visit(ast, definitionMatcher, (node) => { const asMarkdown = toMarkdown(node).trim() // E.g. `[foo]: /bar` if (content.includes(asMarkdown)) { if (opts.fixHref) { let newHref = node.url const { label } = node const betterHref = getNewHref(newHref, context, opts, file) // getNewHref() might return a deliberate `undefined` if the // new href value could not be computed for some reason. if (betterHref !== undefined) { newHref = betterHref } const newAsMarkdown = `[${label}]: ${newHref}` if (asMarkdown !== newAsMarkdown) { // Something can be improved! const column = node.position.start.column const line = node.position.start.line + lineOffset replacements.push({ asMarkdown, newAsMarkdown, line, column, }) newContent = newContent.replace(asMarkdown, newAsMarkdown) } } } }) visit(ast, linkMatcher, (node) => { const asMarkdown = toMarkdown(node).trim() if (content.includes(asMarkdown)) { // The title part of the link might be more Markdown. // For example... // // [This *is* cool](/articles/link) // // In that case, for this link node, the title is the combined // serialization of `node.children`. But `toMarkdown()` always appends // `\n` to the serialized output. // Now the title, of the above-mentioned example becomes 'This *is* cool' // which is unlikely to attempt to be the documents title, that // it links to. const title = node.children.map((child) => toMarkdown(child).slice(0, -1)).join('') let newTitle = title let newHref = node.url const hasQuotesAroundLink = content.includes(`"${asMarkdown}`) if (opts.setAutotitle) { if (hasQuotesAroundLink) { /** * Note! A lot of internal links are bullet points like: * * - [Creating a repository](/articles/create-a-repo) * - [Forking a repository](/articles/fork-a-repo) * or * 1. [Set your username in Git](/github/getting-started-with-github/setting-your-username-in-git). * 1. [Set your commit email address in Git](/articles/setting-your-commit-email-address). * * Perhaps we could recognize them as such an consider them * matches anyway. In particular if the title is make up * a leading capital letter any most rest in lower case. */ if (title !== AUTOTITLE) { newTitle = AUTOTITLE } } else { /** * The Markdown link sometimes is written like this: * * ["This is the title](/foo/bar)." * * or... * * ["This is the title"](/foo/bar). */ if (node.children && node.children.length > 0 && node.children[0].value) { if (singleStartingQuote(node.children[0].value)) { const column = node.position.start.column const line = node.position.start.line + lineOffset warnings.push({ warning: 'Starts with a single " inside the text', asMarkdown, line, column, }) } else if (isSimpleQuote(node.children[0].value)) { const column = node.position.start.column const line = node.position.start.line + lineOffset warnings.push({ warning: 'Starts and ends with a " inside the text', asMarkdown, line, column, }) } } } } if (opts.fixHref) { const betterHref = getNewHref(node.url, context, opts, file) // getNewHref() might return a deliberate `undefined` if the // new href value could not be computed for some reason. if (betterHref !== undefined) { newHref = betterHref } } const newAsMarkdown = `[${newTitle}](${newHref})` if (asMarkdown !== newAsMarkdown) { // Something can be improved! const column = node.position.start.column const line = node.position.start.line + lineOffset replacements.push({ asMarkdown, newAsMarkdown, line, column, }) newContent = newContent.replace(asMarkdown, newAsMarkdown) } } else if (opts.verbose) { console.warn( `Unable to find link as Markdown ('${asMarkdown}') in the source content (${file})` ) } }) return { data, content, rawContent, newContent, replacements, warnings, newData, } } function definitionMatcher(node) { const { type, url } = node if (type === 'definition' && url) { return url.startsWith('/') } return false } function linkMatcher(node) { if (node.type === 'link' && node.url) { const { url } = node if (url.startsWith('/') || url.startsWith('./')) { // Sometimes there's a link to view the asset as a separate link. // Skip these because they ultimately link to an actual Page. if (url.startsWith('/assets') || url.startsWith('/public/')) { return false } // If a link uses Liquid we can't process it. It would require full // rendering which this script is not doing. if (url.includes('{{') || url.includes('{%')) { return false } // Sometimes we link to archived enterprise-server versions. These // can never be updated because although they appear to be internal, // they are, in a sense external. For example: // See "[This old thing](/enterprise-server@3.1/some/page)". // Skip these const version = getVersionStringFromPath(url) if ( version && version.startsWith('enterprise-server@') && deprecated.includes(version.replace('enterprise-server@', '')) ) { return false } // Really old versions like `/enterprise/2.1` don't need to be // corrected because they're deliberately pointing to archived // versions. if (patterns.getEnterpriseVersionNumber.test(url)) { return false } return true } } return false } function getNewFrontmatterLinkList(list, context, opts, file, rawContent) { /** * The `list` is expected to all be strings. Sometimes they're like this: * * /search-github/searching-on-github/searching-for-repositories * * Sometimes they're like this: * * {% ifversion fpt or ghec or ghes > 3.4 %}/pages/getting-started-with-github-pages{% endif %} * * In the case of Liquid, we have to temporarily remove it to be able to * test the path as a URL. **/ const better = [] for (const entry of list) { if (/{%\s*else\s*%}/.test(entry)) { console.warn(`Skipping frontmatter link with {% else %} in it: ${entry}. (file: ${file})`) better.push(entry) continue } const pure = stripLiquid(entry) let asURL = '/en' if (!pure.startsWith('/')) { asURL += '/' } asURL += pure if (asURL in context.pages) { better.push(entry) } else { const redirected = getRedirect(asURL, context) if (redirected === undefined) { const lineNumber = findLineNumber(entry, rawContent) const msg = 'A frontmatter link appears to be broken. ' + `Neither redirect or a findable page: ${pure}. (file: ${file} line: ${ lineNumber || 'unknown' })` if (opts.strict) { throw new Error(msg) } console.warn(`WARNING: ${msg}`) better.push(entry) } else { // Perhaps it just redirected to a specific version const redirectedWithoutLanguage = getPathWithoutLanguage(redirected) const asURLWithoutVersion = getPathWithoutVersion(redirectedWithoutLanguage) if (asURLWithoutVersion === pure) { better.push(entry) } else { better.push(entry.replace(pure, asURLWithoutVersion)) } } } } return better } // Try to return the line in the raw content that entry was on. // It's hard to know exactly because the `entry` is the result of parsing // the YAML, most likely, from the front function findLineNumber(entry, rawContent) { let number = 0 for (const line of rawContent.split(/\n/g)) { number++ if (line.endsWith(entry) && line.includes(` ${entry}`)) { return number } } return null } const liquidStartRex = /^{%-?\s*ifversion .+?\s*%}/ const liquidEndRex = /{%-?\s*endif\s*-?%}$/ // Return // // /foo/bar // // if the text input was // // {% ifversion ghes%}/foo/bar{%endif %} // // And if no liquid, just return as is. function stripLiquid(text) { if (liquidStartRex.test(text) && liquidEndRex.test(text)) { return text.replace(liquidStartRex, '').replace(liquidEndRex, '').trim() } else if (text.includes('{')) { throw new Error(`Unsupported Liquid in frontmatter link list (${text})`) } return text } function equalArray(arr1, arr2) { return arr1.length === arr2.length && arr1.every((item, i) => item === arr2[i]) } function getNewHref(href, context, opts, file) { const { currentLanguage } = context const parsed = new URL(href, 'https://docs.github.com') const hash = parsed.hash const search = parsed.search const pure = parsed.pathname let newHref = pure.replace(patterns.trailingSlash, '$1') // Before testing if it redirects takes it somewhere, we temporarily // pretend it's already prefixed for English (/en) const [language, withoutLanguage] = splitPathByLanguage(newHref, currentLanguage) if (withoutLanguage !== newHref) { // It means the link already had a language in it const msg = `Unable to cope with internal links with hardcoded language '${newHref}' (file: ${file})` if (opts.strict) { throw new Error(msg) } else { console.warn(`WARNING: ${msg}`) return } } const newHrefWithLanguage = getPathWithLanguage(withoutLanguage, language) const redirected = getRedirect(newHrefWithLanguage, context) // If it comes back as `undefined` it means it didn't need to be // redirected, specifically. // Optionally, we could skip this whole step of checking for completely // broken internal links because other tools will later check that. if (redirected === undefined) { if (!context.pages[newHrefWithLanguage]) { // If this happens, it's very possible that it's a broken link const msg = `A link appears to be broken. Neither redirect or a findable page '${href}' (${file})` if (opts.strict) { throw new Error(msg) } else { console.warn(`WARNING: ${msg}`) return } } } if (redirected) { // The getRedirect() function will produce a final URL that the user // can use, but that means it also injects the language in there. // For updating the content statically, we don't want that. // Note: It could be an idea to somehow tell getRedirect() to not // bother but perhaps it adds unnecessarily complexity to a function that // has to work perfectly for runtime. const redirectedWithoutLanguage = getPathWithoutLanguage(redirected) // Some paths can't be viewed in fre-pro-team so the getRedirect() // function will inject the version that you're supposed to go to. // For example `/enterprise/admin/guides/installation/configuring-a-hostname` // redirects to `/enterprise-server@3.7/admin/configuration/configuring-...` // (at the time of writing) which is good when you're actually clicking // the link but not good when we're trying to update the source // content. // The `getPathWithoutVersion` function doesn't change the input if // the URL passed doesn't appear to have a valid version in it already. // I.e. `getPathWithLanguage('/get-started') === '/get-started`` // but `getPathWithLanguage('/enterprise-server@3.8/get-started') === '/get-started`` // But hang on, in some rare cases the content deliberately linked to // a specific version. If that's the case, leave it like that. // There's another exception! Some links have the `/free-pro-team@latest/` // prefix. The `getRedirect()` will always remove that. If that's the case // we always want respect that and put it back in. if (withoutLanguage.includes(`/${nonEnterpriseDefaultVersion}/`)) { newHref = `/${nonEnterpriseDefaultVersion}${redirectedWithoutLanguage}` } else if (withoutLanguage.startsWith('/enterprise-server/')) { const msg = "Old /enterprise-server/ links that don't include a @version is no longer supported. " + 'If you see this, manually fix that link to use enterprise-server@latest.' if (opts.strict) { throw new Error(msg) } else { console.warn(msg) return } } else if (withoutLanguage.startsWith('/enterprise-server@latest')) { // getRedirect() will always replace `enterprise-server@latest` with // whatever the latest number is. E.g. `enterprise-server@3.9`. // But we have to "undo" that. newHref = `/enterprise-server@latest${getPathWithoutVersion(redirectedWithoutLanguage)}` } else if (getPathWithoutVersion(withoutLanguage) !== withoutLanguage) { newHref = redirectedWithoutLanguage } else { newHref = getPathWithoutVersion(redirectedWithoutLanguage) } } if (search) { newHref += search } if (hash) { newHref += hash } return newHref } function singleStartingQuote(text) { return text.startsWith('"') && text.split('"').length === 2 } function isSimpleQuote(text) { return text.startsWith('"') && text.endsWith('"') && text.split('"').length === 3 }