43
.github/workflows/count-translation-corruptions.yml
vendored
Normal file
43
.github/workflows/count-translation-corruptions.yml
vendored
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
name: Count translation corruptions
|
||||||
|
|
||||||
|
# **What it does**: Generates a summary of Liquid corruptions per language.
|
||||||
|
# **Why we have it**: For insights into the state of translations and things we can do to fix them
|
||||||
|
# **Who does it impact**: Engineering
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
pull_request:
|
||||||
|
paths:
|
||||||
|
- src/languages/scripts/count-translation-corruptions.ts
|
||||||
|
- .github/workflows/count-translation-corruptions.yml
|
||||||
|
- .github/actions/node-npm-setup/action.yml
|
||||||
|
- .github/actions/clone-translations/action.yml
|
||||||
|
- 'package**.json'
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
count-translation-corruptions:
|
||||||
|
if: github.repository == 'github/docs-internal'
|
||||||
|
runs-on: ubuntu-20.04-xl
|
||||||
|
steps:
|
||||||
|
- name: Checkout English repo
|
||||||
|
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
|
||||||
|
with:
|
||||||
|
# Using a PAT is necessary so that the new commit will trigger the
|
||||||
|
# CI in the PR. (Events from GITHUB_TOKEN don't trigger new workflows.)
|
||||||
|
token: ${{ secrets.DOCS_BOT_PAT_READPUBLICKEY }}
|
||||||
|
|
||||||
|
# It's important because translations are often a bit behind.
|
||||||
|
# So if a translation is a bit behind, it might still be referencing
|
||||||
|
# an asset even though none of the English content does.
|
||||||
|
- name: Clone all translations
|
||||||
|
uses: ./.github/actions/clone-translations
|
||||||
|
with:
|
||||||
|
token: ${{ secrets.DOCS_BOT_PAT_READPUBLICKEY }}
|
||||||
|
|
||||||
|
- uses: ./.github/actions/node-npm-setup
|
||||||
|
|
||||||
|
- name: Run count
|
||||||
|
run: npm run count-translation-corruptions
|
||||||
@@ -13,10 +13,6 @@ on:
|
|||||||
types:
|
types:
|
||||||
- labeled
|
- labeled
|
||||||
|
|
||||||
pull_request:
|
|
||||||
types:
|
|
||||||
- labeled
|
|
||||||
|
|
||||||
permissions:
|
permissions:
|
||||||
contents: read
|
contents: read
|
||||||
|
|
||||||
|
|||||||
@@ -22,6 +22,7 @@
|
|||||||
"check-content-type": "node src/workflows/check-content-type.js",
|
"check-content-type": "node src/workflows/check-content-type.js",
|
||||||
"check-github-github-links": "node src/links/scripts/check-github-github-links.js",
|
"check-github-github-links": "node src/links/scripts/check-github-github-links.js",
|
||||||
"copy-fixture-data": "node src/tests/scripts/copy-fixture-data.js",
|
"copy-fixture-data": "node src/tests/scripts/copy-fixture-data.js",
|
||||||
|
"count-translation-corruptions": "tsx src/languages/scripts/count-translation-corruptions.ts",
|
||||||
"debug": "cross-env NODE_ENV=development ENABLED_LANGUAGES=en nodemon --inspect src/frame/server.js",
|
"debug": "cross-env NODE_ENV=development ENABLED_LANGUAGES=en nodemon --inspect src/frame/server.js",
|
||||||
"delete-orphan-translation-files": "tsx src/workflows/delete-orphan-translation-files.ts",
|
"delete-orphan-translation-files": "tsx src/workflows/delete-orphan-translation-files.ts",
|
||||||
"dev": "cross-env npm start",
|
"dev": "cross-env npm start",
|
||||||
|
|||||||
@@ -90,6 +90,12 @@ type Query = {
|
|||||||
autofixSupport: 'none' | 'default'
|
autofixSupport: 'none' | 'default'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type QueryExtended = Query & {
|
||||||
|
inDefault: boolean
|
||||||
|
inExtended: boolean
|
||||||
|
inAutofix: boolean
|
||||||
|
}
|
||||||
|
|
||||||
const opts = program.opts()
|
const opts = program.opts()
|
||||||
main(
|
main(
|
||||||
{
|
{
|
||||||
@@ -162,8 +168,28 @@ async function main(options: Options, language: string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const entries = Object.values(queries)
|
function decorate(query: Query): QueryExtended {
|
||||||
entries.sort((a, b) => a.name.localeCompare(b.name))
|
return {
|
||||||
|
...query,
|
||||||
|
inDefault: query.packs.includes('code-scanning'),
|
||||||
|
inExtended: query.packs.includes('security-extended'),
|
||||||
|
inAutofix: query.autofixSupport === 'default',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const entries = Object.values(queries).map(decorate)
|
||||||
|
|
||||||
|
// Spec: "Queries that are both in Default and Extended should come first,
|
||||||
|
// in alphabetical order. Followed by the queries that are in Extended only."
|
||||||
|
entries.sort((a, b) => {
|
||||||
|
if (a.inDefault && !b.inDefault) return -1
|
||||||
|
else if (!a.inDefault && b.inDefault) return 1
|
||||||
|
|
||||||
|
if (a.inExtended && !b.inExtended) return -1
|
||||||
|
else if (!a.inExtended && b.inExtended) return 1
|
||||||
|
|
||||||
|
return a.name.localeCompare(b.name)
|
||||||
|
})
|
||||||
|
|
||||||
// At the moment, our chosen business logic is that we omit the Autofix
|
// At the moment, our chosen business logic is that we omit the Autofix
|
||||||
// column if there are no queries that support it.
|
// column if there are no queries that support it.
|
||||||
@@ -174,7 +200,7 @@ async function main(options: Options, language: string) {
|
|||||||
printQueries(options, entries, includeAutofix)
|
printQueries(options, entries, includeAutofix)
|
||||||
}
|
}
|
||||||
|
|
||||||
function printQueries(options: Options, queries: Query[], includeAutofix: boolean) {
|
function printQueries(options: Options, queries: QueryExtended[], includeAutofix: boolean) {
|
||||||
const markdown = []
|
const markdown = []
|
||||||
markdown.push('{% rowheaders %}')
|
markdown.push('{% rowheaders %}')
|
||||||
markdown.push('') // blank line
|
markdown.push('') // blank line
|
||||||
@@ -190,18 +216,9 @@ function printQueries(options: Options, queries: Query[], includeAutofix: boolea
|
|||||||
|
|
||||||
for (const query of queries) {
|
for (const query of queries) {
|
||||||
const markdownLink = `[${query.name}](${query.url})`
|
const markdownLink = `[${query.name}](${query.url})`
|
||||||
let defaultIcon = notIncludedOcticon
|
const defaultIcon = query.inDefault ? includedOcticon : notIncludedOcticon
|
||||||
let extendedIcon = notIncludedOcticon
|
const extendedIcon = query.inExtended ? includedOcticon : notIncludedOcticon
|
||||||
let autofixIcon = notIncludedOcticon
|
const autofixIcon = query.inAutofix ? includedOcticon : notIncludedOcticon
|
||||||
if (query.packs.includes('code-scanning')) {
|
|
||||||
defaultIcon = includedOcticon
|
|
||||||
}
|
|
||||||
if (query.packs.includes('security-extended')) {
|
|
||||||
extendedIcon = includedOcticon
|
|
||||||
}
|
|
||||||
if (query.autofixSupport === 'default') {
|
|
||||||
autofixIcon = includedOcticon
|
|
||||||
}
|
|
||||||
const row = [markdownLink, query.cwes.join(', '), defaultIcon, extendedIcon]
|
const row = [markdownLink, query.cwes.join(', '), defaultIcon, extendedIcon]
|
||||||
if (includeAutofix) {
|
if (includeAutofix) {
|
||||||
row.push(autofixIcon)
|
row.push(autofixIcon)
|
||||||
|
|||||||
@@ -266,13 +266,17 @@ async function translateTree(dir, langObj, enTree) {
|
|||||||
*
|
*
|
||||||
* Order of languages and versions doesn't matter, but order of child page arrays DOES matter (for navigation).
|
* Order of languages and versions doesn't matter, but order of child page arrays DOES matter (for navigation).
|
||||||
*/
|
*/
|
||||||
export async function loadSiteTree(unversionedTree) {
|
export async function loadSiteTree(unversionedTree, languagesOnly = []) {
|
||||||
const rawTree = Object.assign({}, unversionedTree || (await loadUnversionedTree()))
|
const rawTree = Object.assign({}, unversionedTree || (await loadUnversionedTree(languagesOnly)))
|
||||||
const siteTree = {}
|
const siteTree = {}
|
||||||
|
|
||||||
|
const langCodes = (languagesOnly.length && languagesOnly) || Object.keys(languages)
|
||||||
// For every language...
|
// For every language...
|
||||||
await Promise.all(
|
await Promise.all(
|
||||||
Object.keys(languages).map(async (langCode) => {
|
langCodes.map(async (langCode) => {
|
||||||
|
if (!(langCode in rawTree)) {
|
||||||
|
throw new Error(`No tree for language ${langCode}`)
|
||||||
|
}
|
||||||
const treePerVersion = {}
|
const treePerVersion = {}
|
||||||
// in every version...
|
// in every version...
|
||||||
await Promise.all(
|
await Promise.all(
|
||||||
@@ -329,8 +333,12 @@ export async function loadPageList(unversionedTree, languagesOnly = []) {
|
|||||||
const rawTree = unversionedTree || (await loadUnversionedTree(languagesOnly))
|
const rawTree = unversionedTree || (await loadUnversionedTree(languagesOnly))
|
||||||
const pageList = []
|
const pageList = []
|
||||||
|
|
||||||
|
const langCodes = (languagesOnly.length && languagesOnly) || Object.keys(languages)
|
||||||
await Promise.all(
|
await Promise.all(
|
||||||
((languagesOnly.length && languagesOnly) || Object.keys(languages)).map(async (langCode) => {
|
langCodes.map(async (langCode) => {
|
||||||
|
if (!(langCode in rawTree)) {
|
||||||
|
throw new Error(`No tree for language ${langCode}`)
|
||||||
|
}
|
||||||
await addToCollection(rawTree[langCode], pageList)
|
await addToCollection(rawTree[langCode], pageList)
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -26,8 +26,8 @@ async function warmServer(languagesOnly = []) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const unversionedTree = await dog.loadUnversionedTree(languagesOnly)
|
const unversionedTree = await dog.loadUnversionedTree(languagesOnly)
|
||||||
const siteTree = await dog.loadSiteTree(unversionedTree)
|
const siteTree = await dog.loadSiteTree(unversionedTree, languagesOnly)
|
||||||
const pageList = await dog.loadPages(unversionedTree)
|
const pageList = await dog.loadPages(unversionedTree, languagesOnly)
|
||||||
const pageMap = await dog.loadPageMap(pageList)
|
const pageMap = await dog.loadPageMap(pageList)
|
||||||
const redirects = await dog.loadRedirects(pageList)
|
const redirects = await dog.loadRedirects(pageList)
|
||||||
|
|
||||||
@@ -52,12 +52,12 @@ dog.warmServer = statsd.asyncTimer(warmServer, 'warm_server')
|
|||||||
|
|
||||||
// We only want statistics if the priming needs to occur, so let's wrap the
|
// We only want statistics if the priming needs to occur, so let's wrap the
|
||||||
// real method and return early [without statistics] whenever possible
|
// real method and return early [without statistics] whenever possible
|
||||||
export default async function warmServerWrapper() {
|
export default async function warmServerWrapper(languagesOnly = []) {
|
||||||
// Handle receiving multiple calls to this method from multiple page requests
|
// Handle receiving multiple calls to this method from multiple page requests
|
||||||
// by holding the in-progress Promise and returning it instead of allowing
|
// by holding the in-progress Promise and returning it instead of allowing
|
||||||
// the server to actually load all of the files multiple times.
|
// the server to actually load all of the files multiple times.
|
||||||
if (!promisedWarmServer) {
|
if (!promisedWarmServer) {
|
||||||
promisedWarmServer = dog.warmServer()
|
promisedWarmServer = dog.warmServer(languagesOnly)
|
||||||
}
|
}
|
||||||
return promisedWarmServer
|
return promisedWarmServer
|
||||||
}
|
}
|
||||||
|
|||||||
168
src/languages/scripts/count-translation-corruptions.ts
Normal file
168
src/languages/scripts/count-translation-corruptions.ts
Normal file
@@ -0,0 +1,168 @@
|
|||||||
|
import path from 'path'
|
||||||
|
import fs from 'fs'
|
||||||
|
|
||||||
|
import { program } from 'commander'
|
||||||
|
import chalk from 'chalk'
|
||||||
|
import { TokenizationError } from 'liquidjs'
|
||||||
|
import walk from 'walk-sync'
|
||||||
|
|
||||||
|
import { getLiquidTokens } from '@/content-linter/lib/helpers/liquid-utils.js'
|
||||||
|
import languages from '@/languages/lib/languages.js'
|
||||||
|
import warmServer, { type Site } from '@/frame/lib/warm-server.js'
|
||||||
|
import { correctTranslatedContentStrings } from '@/languages/lib/correct-translation-content.js'
|
||||||
|
|
||||||
|
program
|
||||||
|
.description('Tally the number of liquid corruptions in a translation')
|
||||||
|
.argument('[language...]', 'language(s) to compare against')
|
||||||
|
.action(main)
|
||||||
|
program.parse(process.argv)
|
||||||
|
|
||||||
|
type Page = {
|
||||||
|
relativePath: string
|
||||||
|
fullPath: string
|
||||||
|
title: string
|
||||||
|
shortTitle?: string
|
||||||
|
intro: string
|
||||||
|
markdown: string
|
||||||
|
languageCode: string
|
||||||
|
}
|
||||||
|
|
||||||
|
type Reusables = Map<string, string>
|
||||||
|
|
||||||
|
async function main(languageCodes: string[]) {
|
||||||
|
const langCodes = languageCodes.length
|
||||||
|
? languageCodes
|
||||||
|
: Object.keys(languages).filter((x) => x !== 'en')
|
||||||
|
const site = await warmServer(languageCodes.length ? ['en', ...langCodes] : [])
|
||||||
|
|
||||||
|
// When checking reusables, we only want to check the files that
|
||||||
|
// have an English equivalent.
|
||||||
|
const reusables = getReusables()
|
||||||
|
|
||||||
|
const totalErrors = new Map<string, number>()
|
||||||
|
|
||||||
|
for (const languageCode of langCodes) {
|
||||||
|
if (!(languageCode in languages)) {
|
||||||
|
console.error(chalk.red(`Language ${languageCode} not found`))
|
||||||
|
return process.exit(1)
|
||||||
|
}
|
||||||
|
if (languageCode === 'en') {
|
||||||
|
console.error(chalk.red("Can't test in English ('en')"))
|
||||||
|
return process.exit(1)
|
||||||
|
}
|
||||||
|
const { errors } = run(languageCode, site, reusables)
|
||||||
|
for (const [error, count] of Array.from(errors.entries())) {
|
||||||
|
totalErrors.set(error, (totalErrors.get(error) || 0) + count)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const sumTotal = Array.from(totalErrors.values()).reduce((acc, count) => acc + count, 0)
|
||||||
|
console.log('\nGRAND TOTAL ERRORS:', sumTotal)
|
||||||
|
}
|
||||||
|
|
||||||
|
function getReusables(): Reusables {
|
||||||
|
const reusables = new Map()
|
||||||
|
const files = walk('data/reusables', {
|
||||||
|
includeBasePath: true,
|
||||||
|
globs: ['**/*.md'],
|
||||||
|
ignore: ['**/README.md'],
|
||||||
|
})
|
||||||
|
for (const file of files) {
|
||||||
|
const content = fs.readFileSync(file, 'utf8')
|
||||||
|
reusables.set(file, content)
|
||||||
|
}
|
||||||
|
return reusables
|
||||||
|
}
|
||||||
|
|
||||||
|
function run(languageCode: string, site: Site, englishReusables: Reusables) {
|
||||||
|
const PADDING = 60
|
||||||
|
const language = languages[languageCode as keyof typeof languages]
|
||||||
|
|
||||||
|
console.log(`--- Tallying liquid corruptions in ${languageCode} (${language.name}) ---`)
|
||||||
|
|
||||||
|
const pageList: Page[] = site.pageList
|
||||||
|
const errors = new Map<string, number>()
|
||||||
|
const wheres = new Map<string, number>()
|
||||||
|
const illegalTags = new Map<string, number>()
|
||||||
|
|
||||||
|
function countError(error: TokenizationError, where: string) {
|
||||||
|
const errorString = (error as any).originalError.message as string
|
||||||
|
if (errorString.includes('illegal tag syntax')) {
|
||||||
|
const illegalTag = (error as any).token.content
|
||||||
|
illegalTags.set(illegalTag, (illegalTags.get(illegalTag) || 0) + 1)
|
||||||
|
}
|
||||||
|
errors.set(errorString, (errors.get(errorString) || 0) + 1)
|
||||||
|
wheres.set(where, (wheres.get(where) || 0) + 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const page of pageList) {
|
||||||
|
if (page.languageCode !== languageCode) continue
|
||||||
|
|
||||||
|
const strings: string[][] = [
|
||||||
|
['title', page.title],
|
||||||
|
['shortTitle', page.shortTitle || ''],
|
||||||
|
['intro', page.intro || ''],
|
||||||
|
['markdown', page.markdown],
|
||||||
|
].filter(([, string]) => Boolean(string))
|
||||||
|
|
||||||
|
for (const [where, string] of strings) {
|
||||||
|
try {
|
||||||
|
getLiquidTokens(string)
|
||||||
|
} catch (error) {
|
||||||
|
if (error instanceof TokenizationError) {
|
||||||
|
countError(error, where)
|
||||||
|
} else {
|
||||||
|
throw error
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const [relativePath, englishContent] of Array.from(englishReusables.entries())) {
|
||||||
|
try {
|
||||||
|
const filePath = path.join(language.dir, relativePath)
|
||||||
|
const rawContent = fs.readFileSync(filePath, 'utf8')
|
||||||
|
const correctedContent = correctTranslatedContentStrings(rawContent, englishContent, {
|
||||||
|
code: languageCode,
|
||||||
|
relativePath,
|
||||||
|
})
|
||||||
|
getLiquidTokens(correctedContent)
|
||||||
|
} catch (error) {
|
||||||
|
if (error instanceof TokenizationError) {
|
||||||
|
countError(error, 'reusable')
|
||||||
|
} else if (error instanceof Error && error.message.startsWith('ENOENT')) {
|
||||||
|
continue
|
||||||
|
} else {
|
||||||
|
throw error
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const flat = Array.from(errors.entries()).sort((a, b) => b[1] - a[1])
|
||||||
|
const sumTotal = flat.reduce((acc, [, count]) => acc + count, 0)
|
||||||
|
|
||||||
|
console.log('\nMost common errors')
|
||||||
|
flat.forEach(([error, count], i) => {
|
||||||
|
console.log(`${i + 1}.`.padEnd(3), error.padEnd(PADDING), count)
|
||||||
|
})
|
||||||
|
console.log(`${'TOTAL:'.padEnd(3 + 1 + PADDING)}`, sumTotal)
|
||||||
|
|
||||||
|
if (sumTotal) {
|
||||||
|
const whereFlat = Array.from(wheres.entries()).sort((a, b) => b[1] - a[1])
|
||||||
|
console.log('\nMost common places')
|
||||||
|
whereFlat.forEach(([error, count], i) => {
|
||||||
|
console.log(`${i + 1}.`.padEnd(3), error.padEnd(PADDING), count)
|
||||||
|
})
|
||||||
|
|
||||||
|
const illegalTagsFlat = Array.from(illegalTags.entries()).sort((a, b) => b[1] - a[1])
|
||||||
|
if (illegalTagsFlat.reduce((acc, [, count]) => acc + count, 0)) {
|
||||||
|
console.log('\nMost common illegal tags', illegalTagsFlat.length > 10 ? ' (Top 10)' : '')
|
||||||
|
illegalTagsFlat.slice(0, 10).forEach(([error, count], i) => {
|
||||||
|
console.log(`${i + 1}.`.padEnd(3), error.padEnd(PADDING), count)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
console.log('\n')
|
||||||
|
|
||||||
|
return { errors }
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user