From a1d93a7619bf6d5216f60dbb62c8fd850bcc7b6a Mon Sep 17 00:00:00 2001 From: Vanessa Yuen <6842965+vanessayuenn@users.noreply.github.com> Date: Mon, 1 Feb 2021 17:59:33 +0100 Subject: [PATCH] Lint translation files (#17561) ### Why: A lot of content gets mistranslated, some common patterns are: frontmatter date and enums getting translated, liquid tags get translated or go missing during the translation process. These translation errors are tough to catch, especially when they often come in huge PRs. ### What's being changed: - Frontmatter is also getting linted against schema as part of `lint-files` - When an environment variable `TEST_TRANSLATION` is passed, - `lint-files` will run its tests against all files that have been newly translated (by git-diffing between `translations` branch and `main` branch), and - results are outputted using a custom jest reporter. The output is formatted in a way that makes it easy to exclude the problematic translated files from being merged, and to share the errors with [localization support](https://github.com/github/localization-support/issues/489). - Run the implemented translation test as part of the existing `Node.js Tests - Translations` workflow --- .github/workflows/test-translations.yml | 6 + jest.config.js | 14 +- package.json | 1 + tests/content/lint-files.js | 160 ++++++++++++++------- tests/helpers/lint-translation-reporter.js | 43 ++++++ 5 files changed, 170 insertions(+), 54 deletions(-) create mode 100644 tests/helpers/lint-translation-reporter.js diff --git a/.github/workflows/test-translations.yml b/.github/workflows/test-translations.yml index a452e63a69..9c84db7802 100644 --- a/.github/workflows/test-translations.yml +++ b/.github/workflows/test-translations.yml @@ -17,6 +17,9 @@ jobs: with: ref: translations # check out the 'translations' branch + - name: Check out tip of main + run: git fetch --depth=1 origin main + - name: Setup node uses: actions/setup-node@c46424eee26de4078d34105d3de3cc4992202b1e with: @@ -41,6 +44,9 @@ jobs: - name: Run linter run: npx eslint . + - name: Lint translated content + run: npm run lint-translation + - name: Check dependencies run: npm run check-deps diff --git a/jest.config.js b/jest.config.js index 6b22ede91a..16a291fe73 100644 --- a/jest.config.js +++ b/jest.config.js @@ -2,6 +2,16 @@ const isBrowser = process.env.BROWSER const isActions = Boolean(process.env.GITHUB_ACTIONS) +const testTranslation = Boolean(process.env.TEST_TRANSLATION) + +let reporters = ['default'] + +if (testTranslation) { + // only use custom reporter if we are linting translations + reporters = ['/tests/helpers/lint-translation-reporter.js'] +} else if (isActions) { + reporters.push('jest-github-actions-reporter') +} module.exports = { coverageThreshold: { @@ -15,9 +25,7 @@ module.exports = { preset: isBrowser ? 'jest-puppeteer' : undefined, - reporters: isActions - ? ['default', 'jest-github-actions-reporter'] - : ['default'], + reporters, modulePathIgnorePatterns: [ 'assets/' ], diff --git a/package.json b/package.json index 30de3b171c..c3031c3036 100644 --- a/package.json +++ b/package.json @@ -170,6 +170,7 @@ "build": "cross-env NODE_ENV=production npx webpack --mode production", "start-all-languages": "cross-env NODE_ENV=development nodemon server.js", "lint": "eslint --fix . && prettier -w \"**/*.{yml,yaml}\"", + "lint-translation": "TEST_TRANSLATION=true jest content/lint-files", "test": "jest && eslint . && prettier -c \"**/*.{yml,yaml}\" && npm run check-deps", "prebrowser-test": "npm run build", "browser-test": "start-server-and-test browser-test-server 4001 browser-test-tests", diff --git a/tests/content/lint-files.js b/tests/content/lint-files.js index e58a587c03..074d8749b9 100644 --- a/tests/content/lint-files.js +++ b/tests/content/lint-files.js @@ -2,7 +2,7 @@ const path = require('path') const slash = require('slash') const fs = require('fs') const walk = require('walk-sync') -const { zip } = require('lodash') +const { zip, groupBy } = require('lodash') const yaml = require('js-yaml') const revalidator = require('revalidator') const generateMarkdownAST = require('mdast-util-from-markdown') @@ -12,12 +12,14 @@ const languages = require('../../lib/languages') const { tags } = require('../../lib/liquid-tags/extended-markdown') const ghesReleaseNotesSchema = require('../../lib/release-notes-schema') const renderContent = require('../../lib/render-content') +const { execSync } = require('child_process') const rootDir = path.join(__dirname, '../..') const contentDir = path.join(rootDir, 'content') const reusablesDir = path.join(rootDir, 'data/reusables') const variablesDir = path.join(rootDir, 'data/variables') const glossariesDir = path.join(rootDir, 'data/glossaries') +const ghesReleaseNotesDir = path.join(rootDir, 'data/release-notes') const languageCodes = Object.keys(languages) @@ -149,13 +151,26 @@ const oldVariableErrorText = 'Found article uses old {{ site.data... }} syntax. const oldOcticonErrorText = 'Found octicon variables with the old {{ octicon-name }} syntax. Use {% octicon "name" %} instead!' const oldExtendedMarkdownErrorText = 'Found extended markdown tags with the old {{#note}} syntax. Use {% note %}/{% endnote %} instead!' -describe('lint-files', () => { - const mdWalkOptions = { - globs: ['**/*.md'], - ignore: ['**/README.md'], - directories: false, - includeBasePath: true - } +const mdWalkOptions = { + globs: ['**/*.md'], + ignore: ['**/README.md'], + directories: false, + includeBasePath: true +} + +// Also test the "data/variables/" YAML files + +const yamlWalkOptions = { + globs: ['**/*.yml'], + directories: false, + includeBasePath: true +} + +// different lint rules apply to different content types +let mdToLint, ymlToLint, releaseNotesToLint + +if (!process.env.TEST_TRANSLATION) { + // compile lists of all the files we want to lint const contentMarkdownAbsPaths = walk(contentDir, mdWalkOptions).sort() const contentMarkdownRelPaths = contentMarkdownAbsPaths.map(p => slash(path.relative(rootDir, p))) @@ -165,16 +180,81 @@ describe('lint-files', () => { const reusableMarkdownRelPaths = reusableMarkdownAbsPaths.map(p => slash(path.relative(rootDir, p))) const reusableMarkdownTuples = zip(reusableMarkdownRelPaths, reusableMarkdownAbsPaths) - describe.each([...contentMarkdownTuples, ...reusableMarkdownTuples])( - 'in "%s"', + mdToLint = [...contentMarkdownTuples, ...reusableMarkdownTuples] + + // data/variables + const variableYamlAbsPaths = walk(variablesDir, yamlWalkOptions).sort() + const variableYamlRelPaths = variableYamlAbsPaths.map(p => slash(path.relative(rootDir, p))) + const variableYamlTuples = zip(variableYamlRelPaths, variableYamlAbsPaths) + + // data/glossaries + const glossariesYamlAbsPaths = walk(glossariesDir, yamlWalkOptions).sort() + const glossariesYamlRelPaths = glossariesYamlAbsPaths.map(p => slash(path.relative(rootDir, p))) + const glossariesYamlTuples = zip(glossariesYamlRelPaths, glossariesYamlAbsPaths) + + ymlToLint = [...variableYamlTuples, ...glossariesYamlTuples] + + // GHES release notes + const ghesReleaseNotesYamlAbsPaths = walk(ghesReleaseNotesDir, yamlWalkOptions).sort() + const ghesReleaseNotesYamlRelPaths = ghesReleaseNotesYamlAbsPaths.map(p => path.relative(rootDir, p)) + releaseNotesToLint = zip(ghesReleaseNotesYamlRelPaths, ghesReleaseNotesYamlAbsPaths) +} else { + console.log('testing translations.') + + // get all translated markdown or yaml files by comparing files changed to main branch + const changedFilesRelPaths = execSync('git diff --name-only origin/main | egrep "^translations/.*/.+.(yml|md)$"').toString().split('\n') + console.log(`Found ${changedFilesRelPaths.length} translated files.`) + + const { mdRelPaths, ymlRelPaths, releaseNotesRelPaths } = groupBy(changedFilesRelPaths, (path) => { + // separate the changed files to different groups + if (path.endsWith('README.md')) { + return 'throwAway' + } else if (path.endsWith('.md')) { + return 'mdRelPaths' + } else if (path.match(/\/data\/(variables|glossaries)\//i)) { + return 'ymlRelPaths' + } else if (path.match(/\/data\/release-notes\//i)) { + return 'releaseNotesRelPaths' + } else { + // we aren't linting the rest + return 'throwAway' + } + }) + + const [mdTuples, ymlTuples, releaseNotesTuples] = [mdRelPaths, ymlRelPaths, releaseNotesRelPaths].map(relPaths => { + const absPaths = relPaths.map(p => path.join(rootDir, p)) + return zip(relPaths, absPaths) + }) + + mdToLint = mdTuples + ymlToLint = ymlTuples + releaseNotesToLint = releaseNotesTuples +} + +function formatLinkError (message, links) { + return `${message}\n - ${links.join('\n - ')}` +} + +// Returns `content` if its a string, or `content.description` if it can. +// Used for getting the nested `description` key in glossary files. +function getContent (content) { + if (typeof content === 'string') return content + if (typeof content.description === 'string') return content.description + return null +} + +describe('lint markdown content', () => { + describe.each(mdToLint)( + '%s', (markdownRelPath, markdownAbsPath) => { - let content, ast, links, isHidden, isEarlyAccess, isSitePolicy + let content, ast, links, isHidden, isEarlyAccess, isSitePolicy, frontmatterErrors beforeAll(async () => { const fileContents = await fs.promises.readFile(markdownAbsPath, 'utf8') - const { data, content: bodyContent } = frontmatter(fileContents) + const { data, content: bodyContent, errors } = frontmatter(fileContents) content = bodyContent + frontmatterErrors = errors ast = generateMarkdownAST(content) isHidden = data.hidden === true isEarlyAccess = markdownRelPath.split('/').includes('early-access') @@ -307,34 +387,20 @@ describe('lint-files', () => { .resolves .toBeTruthy() }) + + if (!markdownRelPath.includes('data/reusables')) { + test('contains valid frontmatter', () => { + const errorMessage = frontmatterErrors.map(error => `- [${error.property}]: ${error.actual}, ${error.message}`).join('\n') + expect(frontmatterErrors.length, errorMessage).toBe(0) + }) + } } ) +}) - // Also test the "data/variables/" YAML files - const yamlWalkOptions = { - globs: ['**/*.yml'], - directories: false, - includeBasePath: true - } - - const variableYamlAbsPaths = walk(variablesDir, yamlWalkOptions).sort() - const variableYamlRelPaths = variableYamlAbsPaths.map(p => slash(path.relative(rootDir, p))) - const variableYamlTuples = zip(variableYamlRelPaths, variableYamlAbsPaths) - - const glossariesYamlAbsPaths = walk(glossariesDir, yamlWalkOptions).sort() - const glossariesYamlRelPaths = glossariesYamlAbsPaths.map(p => slash(path.relative(rootDir, p))) - const glossariesYamlTuples = zip(glossariesYamlRelPaths, glossariesYamlAbsPaths) - - // Returns `content` if its a string, or `content.description` if it can. - // Used for getting the nested `description` key in glossary files. - function getContent (content) { - if (typeof content === 'string') return content - if (typeof content.description === 'string') return content.description - return null - } - - describe.each([...variableYamlTuples, ...glossariesYamlTuples])( - 'in "%s"', +describe('lint yaml content', () => { + describe.each(ymlToLint)( + '%s', (yamlRelPath, yamlAbsPath) => { let dictionary, isEarlyAccess @@ -518,16 +584,12 @@ describe('lint-files', () => { }) } ) +}) - // GHES release notes - const ghesReleaseNotesDir = path.join(__dirname, '../../data/release-notes') - const ghesReleaseNotesYamlAbsPaths = walk(ghesReleaseNotesDir, yamlWalkOptions).sort() - const ghesReleaseNotesYamlRelPaths = ghesReleaseNotesYamlAbsPaths.map(p => path.relative(rootDir, p)) - const ghesReleaseNotesYamlTuples = zip(ghesReleaseNotesYamlRelPaths, ghesReleaseNotesYamlAbsPaths) - - if (ghesReleaseNotesYamlTuples.length > 0) { - describe.each(ghesReleaseNotesYamlTuples)( - 'in "%s"', +describe('lint release notes', () => { + if (releaseNotesToLint.length > 0) { + describe.each(releaseNotesToLint)( + '%s', (yamlRelPath, yamlAbsPath) => { let dictionary @@ -538,14 +600,10 @@ describe('lint-files', () => { it('matches the schema', () => { const { errors } = revalidator.validate(dictionary, ghesReleaseNotesSchema) - const errorMessage = errors.map(error => `- [${error.property}]: ${error.attribute}, ${error.message}`).join('\n') + const errorMessage = errors.map(error => `- [${error.property}]: ${error.actual}, ${error.message}`).join('\n') expect(errors.length, errorMessage).toBe(0) }) } ) } }) - -function formatLinkError (message, links) { - return `${message}\n - ${links.join('\n - ')}` -} diff --git a/tests/helpers/lint-translation-reporter.js b/tests/helpers/lint-translation-reporter.js new file mode 100644 index 0000000000..71325c913e --- /dev/null +++ b/tests/helpers/lint-translation-reporter.js @@ -0,0 +1,43 @@ +const chalk = require('chalk') +const { groupBy } = require('lodash') + +// we don't want to print all the stack traces +const stackTrackRegExp = /^\s+at\s.+/i + +class TranslationReporter { + constructor (globalConfig, options) { + this._globalConfig = globalConfig + this._options = options + } + + onRunComplete (contexts, results) { + const failures = results.testResults.reduce((fails, { testResults: assertionResults }) => { + const formattedFails = assertionResults + .filter(result => result.status === 'failed') + .map(({ ancestorTitles, failureMessages, title }) => { + return { + fileName: ancestorTitles[1], + failedTests: title, + failureMessage: failureMessages.map((message) => message.split('\n').filter(line => !stackTrackRegExp.test(line)).join('\n')) + } + }) + return [...fails, ...formattedFails] + }, []) + + const failuresByFile = groupBy(failures, 'fileName') + + for (const fileName in failuresByFile) { + console.group(chalk.red.bold(`\n${fileName}`)) + failuresByFile[fileName].forEach(({ failureMessage }, index) => { + console.log(chalk.bold(`\n(${index + 1})`)) + failureMessage.forEach(msg => console.log(msg)) + }) + console.groupEnd() + } + + console.log(chalk.bold('\nthese files should not be included: ')) + console.dir(Object.keys(failuresByFile), { maxArrayLength: null }) + } +} + +module.exports = TranslationReporter