1
0
mirror of synced 2025-12-23 11:54:18 -05:00

Fix translation reporting (round 1!) (#33754)

This commit is contained in:
Kevin Heis
2023-01-05 12:16:38 -08:00
committed by GitHub
parent 0eb3849eaf
commit 928b0dab02
5 changed files with 109 additions and 143 deletions

View File

@@ -117,7 +117,8 @@ if (process.env.ENABLED_LANGUAGES) {
Object.keys(languages).forEach((code) => { Object.keys(languages).forEach((code) => {
if (!process.env.ENABLED_LANGUAGES.includes(code)) delete languages[code] if (!process.env.ENABLED_LANGUAGES.includes(code)) delete languages[code]
}) })
console.log(`ENABLED_LANGUAGES: ${process.env.ENABLED_LANGUAGES}`) // This makes the translation health report not valid JSON
// console.log(`ENABLED_LANGUAGES: ${process.env.ENABLED_LANGUAGES}`)
} }
} else if (process.env.NODE_ENV === 'test') { } else if (process.env.NODE_ENV === 'test') {
// Unless explicitly set, when running tests default to just English // Unless explicitly set, when running tests default to just English

View File

@@ -112,12 +112,13 @@ async function translateTree(dir, langObj, enTree) {
// has something wrong with, say, the `versions` frontmatter key // has something wrong with, say, the `versions` frontmatter key
// we don't even care because we won't be using it anyway. // we don't even care because we won't be using it anyway.
if (translatableFrontmatterKeys.includes(property)) { if (translatableFrontmatterKeys.includes(property)) {
const msg = `frontmatter error on '${property}' (in ${fullPath}) so falling back to English` const message = `frontmatter error on '${property}' (in ${fullPath}) so falling back to English`
if (DEBUG_TRANSLATION_FALLBACKS) { if (DEBUG_TRANSLATION_FALLBACKS) {
console.warn(msg) // The object format is so the health report knows which path the issue is on
console.warn({ message, path: relativePath })
} }
if (THROW_TRANSLATION_ERRORS) { if (THROW_TRANSLATION_ERRORS) {
throw new Error(msg) throw new Error(message)
} }
data[property] = enData[property] data[property] = enData[property]
} }
@@ -128,12 +129,13 @@ async function translateTree(dir, langObj, enTree) {
if (error.code === 'ENOENT' || error instanceof FrontmatterParsingError) { if (error.code === 'ENOENT' || error instanceof FrontmatterParsingError) {
data = enData data = enData
content = enPage.markdown content = enPage.markdown
const msg = `Unable to initialized ${fullPath} because translation content file does not exist.` const message = `Unable to initialize ${fullPath} because translation content file does not exist.`
if (DEBUG_TRANSLATION_FALLBACKS) { if (DEBUG_TRANSLATION_FALLBACKS) {
console.warn(msg) // The object format is so the health report knows which path the issue is on
console.warn({ message, path: relativePath })
} }
if (THROW_TRANSLATION_ERRORS) { if (THROW_TRANSLATION_ERRORS) {
throw new Error(msg) throw new Error(message)
} }
} else { } else {
throw error throw error

View File

@@ -12,7 +12,7 @@ const EXCEPTIONS_FILE = path.join(__dirname, './static/redirect-exceptions.txt')
// This function runs at server warmup and precompiles possible redirect routes. // This function runs at server warmup and precompiles possible redirect routes.
// It outputs them in key-value pairs within a neat Javascript object: { oldPath: newPath } // It outputs them in key-value pairs within a neat Javascript object: { oldPath: newPath }
async function precompileRedirects(pageList) { export async function precompileRedirects(pageList) {
const allRedirects = readCompressedJsonFileFallback('./lib/redirects/static/developer.json') const allRedirects = readCompressedJsonFileFallback('./lib/redirects/static/developer.json')
const externalRedirects = readCompressedJsonFileFallback('./lib/redirects/external-sites.json') const externalRedirects = readCompressedJsonFileFallback('./lib/redirects/external-sites.json')

View File

@@ -49,9 +49,7 @@ function getNewSrc(node) {
} catch (err) { } catch (err) {
console.warn( console.warn(
`Failed to get a hash for ${src} ` + `Failed to get a hash for ${src} ` +
'(This is mostly harmless and can happen with outdated translations). ' + '(This is mostly harmless and can happen with outdated translations).'
'Full error output:',
err
) )
} }
} }

View File

@@ -9,155 +9,120 @@
/* Nota bene: /* Nota bene:
If you are getting more errors all the sudden, try running this: If you are getting more errors all the sudden, try running this:
$ script/i18n/create-translation-health-report.js -l en -r 000 $ script/i18n/create-translation-health-report.js -l en -r 000
If there's any errors, const context = { ... } probably needs more data. If there's any errors before getting the JSON output,
const context = { ... } probably needs more data.
*/ */
import { program } from 'commander' import { program } from 'commander'
import fs from 'fs/promises' import fs from 'fs/promises'
import { pick } from 'lodash-es'
import { loadPages, loadPageMap } from '../../lib/page-data.js'
import loadSiteData from '../../lib/site-data.js'
import loadRedirects from '../../lib/redirects/precompile.js'
import { allVersions, allVersionKeys } from '../../lib/all-versions.js'
import { languageKeys } from '../../lib/languages.js'
import { getProductStringFromPath } from '../../lib/path-utils.js'
program program
.description('Create a translation health report for one language.') .description('Create a translation health report for one language.')
.requiredOption('-l, --language <language>', 'The language to health check') .requiredOption('-l, --language <language>', 'The language to health check')
.requiredOption('-r, --gitref <sha>', 'Language repo latest git commit short SHA') .option('-r, --gitref <sha>', 'Language repo latest git commit short SHA')
.parse(process.argv) .parse(process.argv)
// Throw errors instead of falling back to English
process.env.DEBUG_TRANSLATION_FALLBACKS = true
// The error option stops everything, but we want it to continue to generate the full report
process.env.ENABLED_LANGUAGES = `en,${program.opts().language}`
// In debug mode, it will call console.warn ... so overriding :)
// Want to make sure the result is valid JSON
const prevConsoleWarn = console.warn
const prevConsoleError = console.error
let issues = []
console.warn = console.error = (...args) => {
if (args.length > 1) {
issues.push({ message: args.map(String).join(' '), score: 0 })
} else if (typeof args[0] === 'string') {
issues.push({ message: args[0], score: 0 })
} else if (args[0]?.constructor === Object) {
const path = args[0].path?.replace('/index.md', '').replace('.md', '')
issues.push({ path, message: args[0].message, score: scores[path] || 0 })
}
}
// Weird import syntax, but forces it to load after process.env... changes
const { languageKeys } = await import('../../lib/languages.js')
const { loadPages, loadPageMap } = await import('../../lib/page-data.js')
const { precompileRedirects } = await import('../../lib/redirects/precompile.js')
const { allVersions, allVersionKeys } = await import('../../lib/all-versions.js')
const { getProductStringFromPath } = await import('../../lib/path-utils.js')
// Check that the language is valid
const { language, gitref } = program.opts()
if (!languageKeys.includes(language)) {
throw new Error(`Language ${language} is not in ${languageKeys.join()}.`)
}
// Gather popularity data the search uses to prioritize errors // Gather popularity data the search uses to prioritize errors
async function fetchPopularityData() { const scores = {}
const output = {} const popularPagesRaw = await fs.readFile('lib/search/popular-pages.json', 'utf8')
const popularPagesRaw = await fs.readFile('lib/search/popular-pages.json', 'utf8') for (const line of popularPagesRaw.split('\n')) {
for (const line of popularPagesRaw.split('\n')) { try {
try { const row = JSON.parse(line)
const row = JSON.parse(line) scores[row.path_article] = row.path_count
output[row.path_article] = row.path_count } catch {}
} catch {}
}
return output
} }
async function collectPageErrors(page, { language, data, redirects, plainPath, pageMap }) { // Load all pages in language
const allPages = await loadPages()
const pages = allPages.filter((page) => page.languageCode === language)
const pageMap = await loadPageMap(pages)
const redirects = await precompileRedirects(pages)
// Try to render each page
for (const page of pages) {
const plainPath = page.relativePath.replace('/index.md', '').replace('.md', '')
// Go through each version... // Go through each version...
const promises = allVersionKeys const versions = allVersionKeys.filter((version) => page.applicableVersions.includes(version))
.filter((version) => page.applicableVersions.includes(version)) const pageIssues = {}
.map(async (version) => { for (const version of versions) {
// Collect if errors const path = `/${language}/${version}/${plainPath}`
const pageVersionErrors = [] // Reference middleware/context.js for shape
try { const context = {
const path = `/${language}/${version}/${plainPath}` currentVersion: version, // needed for all pages
// Reference middleware/context.js for data shape currentLanguage: language, // needed for all pages
const context = { currentPath: path, // needed for all pages
...data, // needed for all pages currentVersionObj: allVersions[version], // needed for ifversion tag
currentVersion: version, // needed for all pages currentProduct: getProductStringFromPath(path), // needed for learning-track on guides pages
currentLanguage: language, // needed for all pages pages: pageMap, // needed for learning-track on guides pages
currentPath: path, // needed for all pages redirects, // needed for learning-track on guides pages
currentVersionObj: allVersions[version], // needed for ifversion tag }
currentProduct: getProductStringFromPath(path), // needed for learning-track on guides pages try {
pages: pageMap, // needed for learning-track on guides pages await page.render(context)
redirects, // needed for learning-track on guides pages } catch (err) {
} // Which messages apply to which versions
await page.render(context, pageVersionErrors) pageIssues[err.message] = pageIssues[err.message] || []
} catch (err) { pageIssues[err.message].push(version)
pageVersionErrors.push(err) }
} }
if (pageVersionErrors.length) { if (Object.keys(pageIssues).length) {
return [ issues.push({
version, path: plainPath,
// Filter down properties to make it easier for messages: pageIssues,
// translators to get the clearest information on the error score: scores[plainPath] || 0,
pageVersionErrors.map((err) => pick(err, ['name', 'message', 'token.content'])),
]
// Other fields: Object.getOwnPropertyNames(err)
}
}) })
const arr = (await Promise.all(promises)).filter(Boolean)
if (arr.length) {
return Object.fromEntries(arr)
} }
} }
function groupErrors(errors) { // Sort by score desc so the translators know what to focus on first
return errors // Issues with more information should be higher
.map((page) => Object.values(page.versions).flat()) issues = issues
.flat() .filter((issue) => !issue.message?.includes('early-access'))
.map((version) => version.message) .sort((a, b) => b.score - a.score || JSON.stringify(b).length - JSON.stringify(a).length)
.reduce((sum, val) => {
sum[val] = sum[val] || 0 // Begin an output report
sum[val]++ const report = {
return sum language,
}, {}) gitref,
datetime: new Date().toJSON(),
issuesCount: issues.length,
issues,
} }
async function createReport() { console.warn = prevConsoleWarn
// Check that the language is valid console.error = prevConsoleError
const { language, gitref } = program.opts() console.log(JSON.stringify(report, null, 2))
if (!languageKeys.includes(language)) {
throw new Error(`Language ${language} is not in ${languageKeys.join()}.`)
}
// Load popularity data to sort errors
const popularity = await fetchPopularityData()
// Load all pages
const allPages = await loadPages()
const dataErrors = []
const data = loadSiteData(dataErrors)[language]
const pages = allPages
.filter((page) => page.languageCode === language)
// Early access pages log to the console, which would show in the report
.filter((page) => !page.relativePath.includes('early-access'))
const pageMap = await loadPageMap(pages)
const redirects = await loadRedirects(pages)
// Try to render each page
const pageErrors = (
await Promise.all(
pages.map(async (page) => {
const plainPath = page.relativePath.replace('/index.md', '').replace('.md', '')
const errorsByVersion = await collectPageErrors(page, {
language,
data,
redirects,
plainPath,
pageMap,
})
if (errorsByVersion) {
return {
path: plainPath,
popularity: popularity[plainPath] || 0,
versions: errorsByVersion,
}
}
})
)
)
.filter(Boolean)
// Sort by popularity desc so the translators know what to focus on first
.sort((a, b) => b.popularity - a.popularity)
// Begin an output report
const report = {
language,
gitref,
datetime: new Date().toJSON(),
totalPages: pages.length,
totalErrorPages: pageErrors.length,
pageErrors,
// To group errors by message instead
groupedPageErrors: groupErrors(pageErrors),
// Filter down properties to make it easier for
// translators to get the clearest information on the error
dataErrors: dataErrors.map((err) => pick(err, ['name', 'message', 'token.content'])),
}
return report
}
console.warn = () => {} // shhh
console.log(JSON.stringify(await createReport(), null, 2))