1
0
mirror of synced 2025-12-26 14:02:45 -05:00
Files
docs/lib/page-data.js
2023-06-09 14:32:23 +00:00

389 lines
14 KiB
JavaScript

import path from 'path'
import languages from './languages.js'
import { allVersions } from './all-versions.js'
import createTree from './create-tree.js'
import nonEnterpriseDefaultVersion from './non-enterprise-default-version.js'
import readFileContents from './read-file-contents.js'
import Page from './page.js'
import frontmatterSchema from './frontmatter.js'
// If you run `export DEBUG_TRANSLATION_FALLBACKS=true` in your terminal,
// every time a translation file fails to initialize we fall back to English
// and write a warning to stdout.
const DEBUG_TRANSLATION_FALLBACKS = Boolean(
JSON.parse(process.env.DEBUG_TRANSLATION_FALLBACKS || 'false')
)
// If you don't want to fall back to English automatically on corrupt
// translation files, set `export THROW_TRANSLATION_ERRORS=true`
const THROW_TRANSLATION_ERRORS = Boolean(
JSON.parse(process.env.THROW_TRANSLATION_ERRORS || 'false')
)
const versions = Object.keys(allVersions)
class FrontmatterParsingError extends Error {}
// Note! As of Nov 2022, the schema says that 'product' is translatable
// which is surprising since only a single page has prose in it.
const translatableFrontmatterKeys = Object.entries(frontmatterSchema.schema.properties)
.filter(([, value]) => value.translatable)
.map(([key]) => key)
/**
* We only need to initialize pages _once per language_ since pages don't change per version. So we do that
* first since it's the most expensive work. This gets us a nested object with pages attached that we can use
* as the basis for the siteTree after we do some versioning. We can also use it to derive the pageList.
*/
export async function loadUnversionedTree(languagesOnly = null) {
if (languagesOnly && !Array.isArray(languagesOnly)) {
throw new Error("'languagesOnly' has to be an array")
}
const unversionedTree = {}
unversionedTree.en = await createTree(path.join(languages.en.dir, 'content'))
const languagesValues = Object.entries(languages)
.filter(([language]) => {
return !languagesOnly || languagesOnly.includes(language)
})
.map(([, data]) => {
return data
})
await Promise.all(
languagesValues
.filter((langObj) => langObj.code !== 'en')
.map(async (langObj) => {
const localizedContentPath = path.join(langObj.dir, 'content')
unversionedTree[langObj.code] = await translateTree(
localizedContentPath,
langObj,
unversionedTree.en
)
})
)
return unversionedTree
}
async function translateTree(dir, langObj, enTree) {
const item = {}
const enPage = enTree.page
const { ...enData } = enPage
const basePath = dir
const relativePath = enPage.relativePath
const fullPath = path.join(basePath, relativePath)
let data
let content
try {
const read = await readFileContents(fullPath)
// If it worked, great!
content = read.content
data = read.data
if (!data) {
// If the file's frontmatter Yaml is entirely broken,
// the result of `readFileContents()` is that you just
// get a `errors` key. E.g.
//
// errors: [
// {
// reason: 'invalid frontmatter entry',
// message: 'YML parsing error!',
// filepath: 'translations/ja-JP/content/get-started/index.md'
// }
// ]
//
// If this the case throw error so we can lump this error with
// how we deal with the file not even being present on disk.
throw new FrontmatterParsingError(read.errors)
}
for (const { property } of read.errors) {
// If any of the errors happened on keys that are considered
// translatable, we can't accept that and have to fall back to
// English.
// For example, if a Japanese page's frontmatter lacks `title`,
// (which triggers a 'is required' error) you can't include it
// because you'd have a Page with `{title: undefined}`.
// The beauty in this is that if the translated content file
// has something wrong with, say, the `versions` frontmatter key
// we don't even care because we won't be using it anyway.
if (translatableFrontmatterKeys.includes(property)) {
const message = `frontmatter error on '${property}' (in ${fullPath}) so falling back to English`
if (DEBUG_TRANSLATION_FALLBACKS) {
// The object format is so the health report knows which path the issue is on
console.warn({ message, path: relativePath })
}
if (THROW_TRANSLATION_ERRORS) {
throw new Error(message)
}
data[property] = enData[property]
}
}
} catch (error) {
// If it didn't work because it didn't exist, don't fret,
// we'll use the English equivalent's data and content.
if (error.code === 'ENOENT' || error instanceof FrontmatterParsingError) {
data = enData
content = enPage.markdown
const message = `Unable to initialize ${fullPath} because translation content file does not exist.`
if (DEBUG_TRANSLATION_FALLBACKS) {
// The object format is so the health report knows which path the issue is on
console.warn({ message, path: relativePath })
}
if (THROW_TRANSLATION_ERRORS) {
throw new Error(message)
}
} else {
throw error
}
}
const translatedData = Object.fromEntries(
translatableFrontmatterKeys.map((key) => {
return [key, data[key]]
})
)
// The "content" isn't a frontmatter key
translatedData.markdown = correctTranslatedContentStrings(content, enPage.markdown)
item.page = new Page(
Object.assign(
{},
// By default, shallow-copy everything from the English equivalent.
enData,
// Overlay with the translations core properties.
{
basePath,
relativePath,
languageCode: langObj.code,
fullPath,
},
// And the translations translated properties.
translatedData
)
)
if (item.page.children) {
item.childPages = await Promise.all(
enTree.childPages
.filter((childTree) => {
// Translations should not get early access pages at all.
return childTree.page.relativePath.split(path.sep)[0] !== 'early-access'
})
.map((childTree) => translateTree(dir, langObj, childTree))
)
}
return item
}
/**
* A lot of translations have minor corruptions that will lead to rendering
* failing (and having to rely on English fallback). Many of these are
* easy to manually correct for.
*
* This function is a temporary solution to correct for these corruptions.
* It looks for easy "low hanging fruit" that we can correct for.
*
*/
export function correctTranslatedContentStrings(content, englishContent, debug = false) {
// A lot of translations have corruptions around the AUTOTITLE links.
// We've requested that these are corrected back but as a temporary
// solution we'll manually recover now.
// See internal issue #2762
// In late 2023, search in the translations repos if these things are
// still happening and if not, the following lines can be removed.
content = content.replaceAll('[AUTOTITLE"을 참조하세요]', '[AUTOTITLE]')
content = content.replaceAll('[AUTOTITLE"을]', '[AUTOTITLE]')
content = content.replaceAll('["AUTOTITLE]', '"[AUTOTITLE]')
content = content.replaceAll('[AUTOTITLE"을 참조하세요.](', '[AUTOTITLE](')
// A lot of Liquid tags lose their linebreak after the `}` which can
// result in formatting problems, especially around Markdown tables.
// This code here, compares each Liquid statement, in the translation,
// and tests if it appears like that but with a newline in the English.
// English example:
//
// {%- ifversion ghes %}
// | Thing | ✔️ |
// {%- endif %}
//
// Translation example:
//
// {%- ifversion ghes %} | Thing | ✔️ | {%- endif %}
//
// There exists the risk that different Liquid statements gets compared
// different Liquid statements in the English, but the risk is worth
// taking because even if this accidentally introduces a newline, it's
// unlikely to cause a problem. At worst that a sentence displays on its
// own paragraph.
content = content.replace(/\{%(.+?)%\} /g, (match) => {
if (match.lastIndexOf('{%') > 0) {
// For example:
//
// `{% bla bla %}, and {% foo bar %} `
//
// Our regex is not greedy, but technically, if you look closely
// you'll see this is the first match that starts with `{%` and
// ends with `%} `. Let's skip these.
return match
}
const withLinebreak = match.slice(0, -1) + '\n'
if (englishContent.includes(withLinebreak) && !englishContent.includes(match)) {
return withLinebreak
}
return match
})
// The above corrections deepend on looking for `{% foo %} ` and replacing
// it with `{% foo %}\n`. ...if `{% foo %}\n` was in the English
// content and `{% foo %} ` was *not*.
// However we see a lot of cases of this:
//
// ... {% endif %} | First Column ...
//
// Which needs to become this:
//
// ... {% endif %}
// | First Column ...
//
// And since `{% endif %}` is such a common Liquid tag we can't reply
// on lookig for it with `{% endif %}\n` in the English content.
content = content.replace(/\{% endif %\} \| /g, (match) => {
const potentiallyBetter = '{% endif %}\n| '
if (englishContent.includes(potentiallyBetter)) {
return potentiallyBetter
}
return match
})
// All too often we see translations that look like this:
//
// | Qualifizierer | Beschreibung | | -------- | -------- | {% ifversion ghec or ghes > 3.8 %} | `advanced-security:enabled` | Zeigt Repositorys an, für die {% data variables.product.prodname_GH_advanced_security %} aktiviert wurde | {% endif %} | `code-scanning-pull-request-alerts:enabled`| Zeigt Repositorys an, für die die {% data variables.product.prodname_code_scanning %} zur Ausführung bei Pull Requests konfiguriert wurde | | `dependabot-security-updates:enabled` | Zeigt Repositorys an, für die {% data variables.product.prodname_dependabot %}-Sicherheitsupdates aktiviert wurden | | `secret-scanning-push-protection:enabled` | Zeigt Repositorys an, für die der Pushschutz für die {% data variables.product.prodname_secret_scanning %} aktiviert wurde | {% endif %}
//
// Yes, that's one very long line. Notice how all the necessary linebreaks
// are suddenly gone.
content = content.replaceAll(' | | ', ' |\n| ')
return content
}
/**
* The siteTree is a nested object with pages for every language and version, useful for nav because it
* contains parent, child, and sibling relationships:
*
* siteTree[languageCode][version].childPages[<array of pages>].childPages[<array of pages>] (etc...)
* Given an unversioned tree of all pages per language, we can walk it for each version and do a couple operations:
* 1. Add a versioned href to every item, where the href is the relevant permalink for the current version.
* 2. Drop any child pages that are not available in the current version.
*
* Order of languages and versions doesn't matter, but order of child page arrays DOES matter (for navigation).
*/
export async function loadSiteTree(unversionedTree) {
const rawTree = Object.assign({}, unversionedTree || (await loadUnversionedTree()))
const siteTree = {}
// For every language...
await Promise.all(
Object.keys(languages).map(async (langCode) => {
const treePerVersion = {}
// in every version...
await Promise.all(
versions.map(async (version) => {
// "version" the pages.
treePerVersion[version] = await versionPages(
Object.assign({}, rawTree[langCode]),
version,
langCode
)
})
)
siteTree[langCode] = treePerVersion
})
)
return siteTree
}
export async function versionPages(obj, version, langCode) {
// Add a versioned href as a convenience for use in layouts.
obj.href = obj.page.permalinks.find(
(pl) =>
pl.pageVersion === version ||
(pl.pageVersion === 'homepage' && version === nonEnterpriseDefaultVersion)
).href
if (!obj.childPages) return obj
const versionedChildPages = await Promise.all(
obj.childPages
// Drop child pages that do not apply to the current version
.filter((childPage) => childPage.page.applicableVersions.includes(version))
// Version the child pages recursively.
.map((childPage) => versionPages(Object.assign({}, childPage), version, langCode))
)
obj.childPages = [...versionedChildPages]
return obj
}
// Derive a flat array of Page objects in all languages.
export async function loadPageList(unversionedTree, languagesOnly = null) {
if (languagesOnly && !Array.isArray(languagesOnly)) {
throw new Error("'languagesOnly' has to be an array")
}
const rawTree = unversionedTree || (await loadUnversionedTree(languagesOnly))
const pageList = []
await Promise.all(
(languagesOnly || Object.keys(languages)).map(async (langCode) => {
await addToCollection(rawTree[langCode], pageList)
})
)
async function addToCollection(item, collection) {
if (!item.page) return
collection.push(item.page)
if (!item.childPages) return
await Promise.all(
item.childPages.map(async (childPage) => await addToCollection(childPage, collection))
)
}
return pageList
}
export const loadPages = loadPageList
// Create an object from the list of all pages with permalinks as keys for fast lookup.
export function createMapFromArray(pageList) {
const pageMap = pageList.reduce((pageMap, page) => {
for (const permalink of page.permalinks) {
pageMap[permalink.href] = page
}
return pageMap
}, {})
return pageMap
}
export async function loadPageMap(pageList, languagesOnly = null) {
const pages = pageList || (await loadPageList(languagesOnly))
const pageMap = createMapFromArray(pages)
return pageMap
}
export default {
loadUnversionedTree,
loadSiteTree,
loadPages: loadPageList,
loadPageMap,
}