only load site tree from English and "translate" the others (#32761)
This commit is contained in:
@@ -1,34 +1,10 @@
|
|||||||
import path from 'path'
|
import path from 'path'
|
||||||
import fs from 'fs/promises'
|
import fs from 'fs/promises'
|
||||||
|
|
||||||
import languages from './languages.js'
|
import Page from './page.js'
|
||||||
import Page, { FrontmatterErrorsError } from './page.js'
|
|
||||||
|
|
||||||
// If you run `export DEBUG_TRANSLATION_FALLBACKS=true` in your terminal,
|
export default async function createTree(originalPath, rootPath) {
|
||||||
// every time a translation file fails to initialize we fall back to English
|
const basePath = rootPath || originalPath
|
||||||
// and write a warning to stdout.
|
|
||||||
const DEBUG_TRANSLATION_FALLBACKS = Boolean(
|
|
||||||
JSON.parse(process.env.DEBUG_TRANSLATION_FALLBACKS || 'false')
|
|
||||||
)
|
|
||||||
// If you don't want to fall back to English automatically on corrupt
|
|
||||||
// translation files, set `export THROW_TRANSLATION_ERRORS=true`
|
|
||||||
const THROW_TRANSLATION_ERRORS = Boolean(
|
|
||||||
JSON.parse(process.env.THROW_TRANSLATION_ERRORS || 'false')
|
|
||||||
)
|
|
||||||
|
|
||||||
// Module level cache
|
|
||||||
const _basePaths = new Map()
|
|
||||||
export function getBasePath(directory) {
|
|
||||||
if (!_basePaths.has(directory)) {
|
|
||||||
_basePaths.set(directory, path.posix.join(directory, 'content'))
|
|
||||||
}
|
|
||||||
return _basePaths.get(directory)
|
|
||||||
}
|
|
||||||
|
|
||||||
export default async function createTree(originalPath, langObj) {
|
|
||||||
// This basePath definition is needed both here and in lib/page-data.js because this
|
|
||||||
// function runs recursively, and the value for originalPath changes on recursive runs.
|
|
||||||
const basePath = getBasePath(langObj.dir)
|
|
||||||
|
|
||||||
// On recursive runs, this is processing page.children items in `/<link>` format.
|
// On recursive runs, this is processing page.children items in `/<link>` format.
|
||||||
// If the path exists as is, assume this is a directory with a child index.md.
|
// If the path exists as is, assume this is a directory with a child index.md.
|
||||||
@@ -44,54 +20,22 @@ export default async function createTree(originalPath, langObj) {
|
|||||||
const relativePath = filepath.replace(`${basePath}/`, '')
|
const relativePath = filepath.replace(`${basePath}/`, '')
|
||||||
|
|
||||||
// Initialize the Page! This is where the file reads happen.
|
// Initialize the Page! This is where the file reads happen.
|
||||||
let page
|
const page = await Page.init({
|
||||||
try {
|
basePath,
|
||||||
page = await Page.init({
|
relativePath,
|
||||||
basePath,
|
languageCode: 'en',
|
||||||
relativePath,
|
})
|
||||||
languageCode: langObj.code,
|
|
||||||
})
|
|
||||||
} catch (err) {
|
|
||||||
if (
|
|
||||||
!THROW_TRANSLATION_ERRORS &&
|
|
||||||
err instanceof FrontmatterErrorsError &&
|
|
||||||
langObj.code !== 'en'
|
|
||||||
) {
|
|
||||||
// Something corrupt in the `.md` file caused it to throw an
|
|
||||||
// error from reading it in. Let's "gracefully" recover by
|
|
||||||
// swapping this one out for the English content and pretend it
|
|
||||||
// exists in this other language.
|
|
||||||
const englishBasePath = getBasePath(languages.en.dir)
|
|
||||||
page = await Page.init({
|
|
||||||
basePath: englishBasePath,
|
|
||||||
relativePath,
|
|
||||||
languageCode: langObj.code,
|
|
||||||
})
|
|
||||||
if (DEBUG_TRANSLATION_FALLBACKS) {
|
|
||||||
console.warn(
|
|
||||||
`Unable to initialized ${path.join(basePath, relativePath)} due to frontmatter errors. ` +
|
|
||||||
`Will proceed with using ${path.join(englishBasePath, relativePath)} instead.`
|
|
||||||
)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
throw err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!page) {
|
if (!page) {
|
||||||
// Do not throw an error if Early Access is not available.
|
// Do not throw an error if Early Access is not available.
|
||||||
if (relativePath.startsWith('early-access')) {
|
if (relativePath.startsWith('early-access')) {
|
||||||
if (langObj.code === 'en') {
|
console.warn(
|
||||||
console.warn(
|
`${relativePath} could not be turned into a Page, but is ignore because it's early-access`
|
||||||
`${relativePath} could not be turned into a Page, but is ignore because it's early-access`
|
)
|
||||||
)
|
|
||||||
}
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// Do not throw an error if translated page is not available.
|
|
||||||
if (langObj.code !== 'en') return
|
|
||||||
|
|
||||||
throw Error(`Cannot initialize page for ${filepath} in ${langObj.code}`)
|
throw Error(`Cannot initialize page for ${filepath}`)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create the root tree object on the first run, and create children recursively.
|
// Create the root tree object on the first run, and create children recursively.
|
||||||
@@ -105,7 +49,7 @@ export default async function createTree(originalPath, langObj) {
|
|||||||
item.childPages = (
|
item.childPages = (
|
||||||
await Promise.all(
|
await Promise.all(
|
||||||
item.page.children.map(
|
item.page.children.map(
|
||||||
async (child) => await createTree(path.posix.join(originalPath, child), langObj)
|
async (child) => await createTree(path.posix.join(originalPath, child), basePath)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
).filter(Boolean)
|
).filter(Boolean)
|
||||||
|
|||||||
256
lib/page-data.js
256
lib/page-data.js
@@ -2,23 +2,33 @@ import path from 'path'
|
|||||||
|
|
||||||
import languages from './languages.js'
|
import languages from './languages.js'
|
||||||
import { allVersions } from './all-versions.js'
|
import { allVersions } from './all-versions.js'
|
||||||
import createTree, { getBasePath } from './create-tree.js'
|
import createTree from './create-tree.js'
|
||||||
import nonEnterpriseDefaultVersion from './non-enterprise-default-version.js'
|
import nonEnterpriseDefaultVersion from './non-enterprise-default-version.js'
|
||||||
|
import readFileContents from './read-file-contents.js'
|
||||||
import Page from './page.js'
|
import Page from './page.js'
|
||||||
|
import frontmatterSchema from './frontmatter.js'
|
||||||
|
|
||||||
|
// If you run `export DEBUG_TRANSLATION_FALLBACKS=true` in your terminal,
|
||||||
|
// every time a translation file fails to initialize we fall back to English
|
||||||
|
// and write a warning to stdout.
|
||||||
|
const DEBUG_TRANSLATION_FALLBACKS = Boolean(
|
||||||
|
JSON.parse(process.env.DEBUG_TRANSLATION_FALLBACKS || 'false')
|
||||||
|
)
|
||||||
|
// If you don't want to fall back to English automatically on corrupt
|
||||||
|
// translation files, set `export THROW_TRANSLATION_ERRORS=true`
|
||||||
|
const THROW_TRANSLATION_ERRORS = Boolean(
|
||||||
|
JSON.parse(process.env.THROW_TRANSLATION_ERRORS || 'false')
|
||||||
|
)
|
||||||
|
|
||||||
const versions = Object.keys(allVersions)
|
const versions = Object.keys(allVersions)
|
||||||
|
|
||||||
// These are the exceptions to the rule.
|
class FrontmatterParsingError extends Error {}
|
||||||
// If a URI starts with one of these prefixes, it basically means we don't
|
|
||||||
// bother to "backfill" a translation in its spot.
|
// Note! As of Nov 2022, the schema says that 'product' is translatable
|
||||||
// For example, `/en/github/site-policy-deprecated/foo` works
|
// which is surprising since only a single page has prose in it.
|
||||||
// only in English and we don't bother making `/ja/github/site-policy-deprecated/foo`
|
const translatableFrontmatterKeys = Object.entries(frontmatterSchema.schema.properties)
|
||||||
// work too.
|
.filter(([, value]) => value.translatable)
|
||||||
const TRANSLATION_DRIFT_EXCEPTIONS = [
|
.map(([key]) => key)
|
||||||
'github/site-policy-deprecated',
|
|
||||||
// Early access stuff never has translations.
|
|
||||||
'early-access',
|
|
||||||
]
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* We only need to initialize pages _once per language_ since pages don't change per version. So we do that
|
* We only need to initialize pages _once per language_ since pages don't change per version. So we do that
|
||||||
@@ -30,6 +40,7 @@ export async function loadUnversionedTree(languagesOnly = null) {
|
|||||||
throw new Error("'languagesOnly' has to be an array")
|
throw new Error("'languagesOnly' has to be an array")
|
||||||
}
|
}
|
||||||
const unversionedTree = {}
|
const unversionedTree = {}
|
||||||
|
unversionedTree.en = await createTree(path.join(languages.en.dir, 'content'))
|
||||||
|
|
||||||
const languagesValues = Object.entries(languages)
|
const languagesValues = Object.entries(languages)
|
||||||
.filter(([language]) => {
|
.filter(([language]) => {
|
||||||
@@ -38,16 +49,135 @@ export async function loadUnversionedTree(languagesOnly = null) {
|
|||||||
.map(([, data]) => {
|
.map(([, data]) => {
|
||||||
return data
|
return data
|
||||||
})
|
})
|
||||||
|
|
||||||
await Promise.all(
|
await Promise.all(
|
||||||
languagesValues.map(async (langObj) => {
|
languagesValues
|
||||||
const localizedContentPath = path.posix.join(langObj.dir, 'content')
|
.filter((langObj) => langObj.code !== 'en')
|
||||||
unversionedTree[langObj.code] = await createTree(localizedContentPath, langObj)
|
.map(async (langObj) => {
|
||||||
})
|
const localizedContentPath = path.join(langObj.dir, 'content')
|
||||||
|
unversionedTree[langObj.code] = await translateTree(
|
||||||
|
localizedContentPath,
|
||||||
|
langObj,
|
||||||
|
unversionedTree.en
|
||||||
|
)
|
||||||
|
})
|
||||||
)
|
)
|
||||||
|
|
||||||
return unversionedTree
|
return unversionedTree
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function translateTree(dir, langObj, enTree) {
|
||||||
|
const item = {}
|
||||||
|
const enPage = enTree.page
|
||||||
|
const { ...enData } = enPage
|
||||||
|
|
||||||
|
const basePath = dir
|
||||||
|
const relativePath = enPage.relativePath
|
||||||
|
const fullPath = path.join(basePath, relativePath)
|
||||||
|
|
||||||
|
let data
|
||||||
|
let content
|
||||||
|
try {
|
||||||
|
const read = await readFileContents(fullPath)
|
||||||
|
// If it worked, great!
|
||||||
|
content = read.content
|
||||||
|
data = read.data
|
||||||
|
|
||||||
|
if (!data) {
|
||||||
|
// If the file's frontmatter Yaml is entirely broken,
|
||||||
|
// the result of `readFileContents()` is that you just
|
||||||
|
// get a `errors` key. E.g.
|
||||||
|
//
|
||||||
|
// errors: [
|
||||||
|
// {
|
||||||
|
// reason: 'invalid frontmatter entry',
|
||||||
|
// message: 'YML parsing error!',
|
||||||
|
// filepath: 'translations/ja-JP/content/get-started/index.md'
|
||||||
|
// }
|
||||||
|
// ]
|
||||||
|
//
|
||||||
|
// If this the case throw error so we can lump this error with
|
||||||
|
// how we deal with the file not even being present on disk.
|
||||||
|
throw new FrontmatterParsingError(read.errors)
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const { property } of read.errors) {
|
||||||
|
// If any of the errors happened on keys that are considered
|
||||||
|
// translatable, we can't accept that and have to fall back to
|
||||||
|
// English.
|
||||||
|
// For example, if a Japanese page's frontmatter lacks `title`,
|
||||||
|
// (which triggers a 'is required' error) you can't include it
|
||||||
|
// because you'd have a Page with `{title: undefined}`.
|
||||||
|
// The beauty in this is that if the translated content file
|
||||||
|
// has something wrong with, say, the `versions` frontmatter key
|
||||||
|
// we don't even care because we won't be using it anyway.
|
||||||
|
if (translatableFrontmatterKeys.includes(property)) {
|
||||||
|
const msg = `frontmatter error on '${property}' (in ${fullPath}) so falling back to English`
|
||||||
|
if (DEBUG_TRANSLATION_FALLBACKS) {
|
||||||
|
console.warn(msg)
|
||||||
|
}
|
||||||
|
if (THROW_TRANSLATION_ERRORS) {
|
||||||
|
throw new Error(msg)
|
||||||
|
}
|
||||||
|
data[property] = enData[property]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
// If it didn't work because it didn't exist, don't fret,
|
||||||
|
// we'll use the English equivalent's data and content.
|
||||||
|
if (error.code === 'ENOENT' || error instanceof FrontmatterParsingError) {
|
||||||
|
data = enData
|
||||||
|
content = enPage.markdown
|
||||||
|
const msg = `Unable to initialized ${fullPath} because translation content file does not exist.`
|
||||||
|
if (DEBUG_TRANSLATION_FALLBACKS) {
|
||||||
|
console.warn(msg)
|
||||||
|
}
|
||||||
|
if (THROW_TRANSLATION_ERRORS) {
|
||||||
|
throw new Error(msg)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw error
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const translatedData = Object.fromEntries(
|
||||||
|
translatableFrontmatterKeys.map((key) => {
|
||||||
|
return [key, data[key]]
|
||||||
|
})
|
||||||
|
)
|
||||||
|
// The "content" isn't a frontmatter key
|
||||||
|
translatedData.markdown = content
|
||||||
|
|
||||||
|
item.page = new Page(
|
||||||
|
Object.assign(
|
||||||
|
{},
|
||||||
|
// By default, shallow-copy everything from the English equivalent.
|
||||||
|
enData,
|
||||||
|
// Overlay with the translations core properties.
|
||||||
|
{
|
||||||
|
basePath,
|
||||||
|
relativePath,
|
||||||
|
languageCode: langObj.code,
|
||||||
|
fullPath,
|
||||||
|
},
|
||||||
|
// And the translations translated properties.
|
||||||
|
translatedData
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if (item.page.children) {
|
||||||
|
item.childPages = await Promise.all(
|
||||||
|
enTree.childPages
|
||||||
|
.filter((childTree) => {
|
||||||
|
// Translations should not get early access pages at all.
|
||||||
|
return childTree.page.relativePath.split(path.sep)[0] !== 'early-access'
|
||||||
|
})
|
||||||
|
.map((childTree) => translateTree(dir, langObj, childTree))
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return item
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The siteTree is a nested object with pages for every language and version, useful for nav because it
|
* The siteTree is a nested object with pages for every language and version, useful for nav because it
|
||||||
* contains parent, child, and sibling relationships:
|
* contains parent, child, and sibling relationships:
|
||||||
@@ -151,103 +281,11 @@ export function createMapFromArray(pageList) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export async function loadPageMap(pageList) {
|
export async function loadPageMap(pageList) {
|
||||||
const pages = await correctTranslationOrphans(pageList || (await loadPageList()))
|
const pages = pageList || (await loadPageList())
|
||||||
const pageMap = createMapFromArray(pages)
|
const pageMap = createMapFromArray(pages)
|
||||||
return pageMap
|
return pageMap
|
||||||
}
|
}
|
||||||
|
|
||||||
// If a translation page exists, that doesn't have an English equivalent,
|
|
||||||
// remove it.
|
|
||||||
// If an English page exists, that doesn't have an translation equivalent,
|
|
||||||
// add it.
|
|
||||||
// Note, this function is exported purely for the benefit of the unit tests.
|
|
||||||
export async function correctTranslationOrphans(pageList, basePath = null) {
|
|
||||||
const englishRelativePaths = new Set()
|
|
||||||
for (const page of pageList) {
|
|
||||||
if (page.languageCode === 'en') {
|
|
||||||
englishRelativePaths.add(page.relativePath)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Prime the Map with an empty set for each language prefix.
|
|
||||||
// It's important that we do this for *every* language rather than
|
|
||||||
// just populating `nonEnglish` based on those pages that *are* present.
|
|
||||||
// Otherwise, we won't have an index of all the languages
|
|
||||||
// that *might* be missing.
|
|
||||||
const nonEnglish = new Map()
|
|
||||||
Object.keys(languages)
|
|
||||||
.filter((lang) => lang !== 'en')
|
|
||||||
.forEach((languageCode) => {
|
|
||||||
nonEnglish.set(languageCode, new Set())
|
|
||||||
})
|
|
||||||
|
|
||||||
// By default, when backfilling, we set the `basePath` to be that of
|
|
||||||
// English. But for the benefit of being able to do unit tests,
|
|
||||||
// we make this an optional argument. Then, unit tests can use
|
|
||||||
// its "tests/fixtures" directory.
|
|
||||||
const englishBasePath = basePath || getBasePath(languages.en.dir)
|
|
||||||
|
|
||||||
// Filter out all non-English pages that appear to be excess.
|
|
||||||
// E.g. if an English doc was renamed from `content/foo.md` to
|
|
||||||
// `content/bar.md` what will happen is that `TRANSLATIONS_ROOT/*/content/foo.md`
|
|
||||||
// will still linger around and we want to remove that even if it was
|
|
||||||
// scooped up from disk.
|
|
||||||
const newPageList = []
|
|
||||||
for (const page of pageList) {
|
|
||||||
if (page.languageCode === 'en') {
|
|
||||||
// English pages are never considered "excess"
|
|
||||||
newPageList.push(page)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// If this translation page exists in English, keep it but also
|
|
||||||
// add it to the set of relative paths that is known.
|
|
||||||
if (englishRelativePaths.has(page.relativePath)) {
|
|
||||||
nonEnglish.get(page.languageCode).add(page.relativePath)
|
|
||||||
newPageList.push(page)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const pageLoadPromises = []
|
|
||||||
for (const relativePath of englishRelativePaths) {
|
|
||||||
for (const [languageCode, relativePaths] of nonEnglish) {
|
|
||||||
if (!relativePaths.has(relativePath)) {
|
|
||||||
// At this point, we've found an English `relativePath` that is
|
|
||||||
// not used by this language.
|
|
||||||
// But before we decide to "backfill" it from the English equivalent
|
|
||||||
// we first need to figure out if it should be excluded.
|
|
||||||
// The reason for doing this check this late is for the benefit
|
|
||||||
// of optimization. In general, when the translation pipeline has
|
|
||||||
// done its magic, this should be very rare, so it's unnecessary
|
|
||||||
// to do this exception check on every single English relativePath.
|
|
||||||
if (TRANSLATION_DRIFT_EXCEPTIONS.find((exception) => relativePath.startsWith(exception))) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// The magic right here!
|
|
||||||
// The trick is that we can't clone instances of class Page. We need
|
|
||||||
// to create them for this language. But the trick is that we
|
|
||||||
// use the English relative path so it can have something to read.
|
|
||||||
// For example, if we have figured out that
|
|
||||||
// `TRANSLATIONS_ROOT/ja-JP/content/foo.md` doesn't exist, we pretend
|
|
||||||
// that we can use `foo.md` and the base path of `content/`.
|
|
||||||
pageLoadPromises.push(
|
|
||||||
Page.init({
|
|
||||||
basePath: englishBasePath,
|
|
||||||
relativePath,
|
|
||||||
languageCode,
|
|
||||||
})
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const additionalPages = await Promise.all(pageLoadPromises)
|
|
||||||
newPageList.push(...additionalPages)
|
|
||||||
|
|
||||||
return newPageList
|
|
||||||
}
|
|
||||||
|
|
||||||
export default {
|
export default {
|
||||||
loadUnversionedTree,
|
loadUnversionedTree,
|
||||||
loadSiteTree,
|
loadSiteTree,
|
||||||
|
|||||||
@@ -67,7 +67,7 @@ class Page {
|
|||||||
}
|
}
|
||||||
|
|
||||||
constructor(opts) {
|
constructor(opts) {
|
||||||
if (opts.frontmatterErrors.length) {
|
if (opts.frontmatterErrors && opts.frontmatterErrors.length) {
|
||||||
throw new FrontmatterErrorsError(
|
throw new FrontmatterErrorsError(
|
||||||
`${opts.frontmatterErrors.length} frontmatter errors trying to load ${opts.fullPath}`,
|
`${opts.frontmatterErrors.length} frontmatter errors trying to load ${opts.fullPath}`,
|
||||||
opts.frontmatterErrors
|
opts.frontmatterErrors
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import { jest } from '@jest/globals'
|
import { jest } from '@jest/globals'
|
||||||
import path from 'path'
|
import path from 'path'
|
||||||
import { loadPages, correctTranslationOrphans } from '../../lib/page-data.js'
|
import { loadPages } from '../../lib/page-data.js'
|
||||||
import libLanguages from '../../lib/languages.js'
|
import libLanguages from '../../lib/languages.js'
|
||||||
import { liquid } from '../../lib/render-content/index.js'
|
import { liquid } from '../../lib/render-content/index.js'
|
||||||
import patterns from '../../lib/patterns.js'
|
import patterns from '../../lib/patterns.js'
|
||||||
@@ -18,7 +18,7 @@ describe('pages module', () => {
|
|||||||
let pages
|
let pages
|
||||||
|
|
||||||
beforeAll(async () => {
|
beforeAll(async () => {
|
||||||
pages = await correctTranslationOrphans(await loadPages())
|
pages = await loadPages()
|
||||||
})
|
})
|
||||||
|
|
||||||
describe('loadPages', () => {
|
describe('loadPages', () => {
|
||||||
|
|||||||
Reference in New Issue
Block a user