132
.github/workflows/translation-health-report.yml
vendored
Normal file
132
.github/workflows/translation-health-report.yml
vendored
Normal file
@@ -0,0 +1,132 @@
|
||||
name: Translation health report
|
||||
|
||||
# **What it does**: Provides errors and summary statistics on rendering translated content.
|
||||
# **Why we have it**: To improve our translations by having clearer visibility.
|
||||
# **Who does it impact**: Docs engineering, Microsoft translators.
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: '20 16 * * *' # Run every day at 16:20 UTC / 8:20 PST
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
create-translation-health-report:
|
||||
name: Create translation health report
|
||||
if: github.repository == 'github/docs-internal'
|
||||
runs-on: ubuntu-latest
|
||||
# This sets a maximum execution time of 300 minutes (5 hours)
|
||||
# to prevent the workflow from running longer than necessary.
|
||||
timeout-minutes: 300
|
||||
strategy:
|
||||
fail-fast: false
|
||||
max-parallel: 1
|
||||
matrix:
|
||||
include:
|
||||
- language: es
|
||||
language_dir: translations/es-ES
|
||||
language_repo: github/docs-internal.es-es
|
||||
|
||||
- language: ja
|
||||
language_dir: translations/ja-JP
|
||||
language_repo: github/docs-internal.ja-jp
|
||||
|
||||
- language: pt
|
||||
language_dir: translations/pt-BR
|
||||
language_repo: github/docs-internal.pt-br
|
||||
|
||||
- language: cn
|
||||
language_dir: translations/zh-CN
|
||||
language_repo: github/docs-internal.zh-cn
|
||||
|
||||
# We'll be ready to add the following languages in a future effort.
|
||||
|
||||
# - language: ru
|
||||
# language_dir: translations/ru-RU
|
||||
# language_repo: github/docs-internal.ru-ru
|
||||
|
||||
# - language: ko
|
||||
# language_dir: translations/ko-KR
|
||||
# language_repo: github/docs-internal.ko-kr
|
||||
|
||||
# - language: fr
|
||||
# language_dir: translations/fr-FR
|
||||
# language_repo: github/docs-internal.fr-fr
|
||||
|
||||
# - language: de
|
||||
# language_dir: translations/de-DE
|
||||
# language_repo: github/docs-internal.de-de
|
||||
|
||||
steps:
|
||||
- name: Checkout the docs-internal repo
|
||||
uses: actions/checkout@dcd71f646680f2efd8db4afa5ad64fdcba30e748
|
||||
|
||||
- name: Remove all language translations
|
||||
run: |
|
||||
git rm -rf --quiet ${{ matrix.language_dir }}/content
|
||||
git rm -rf --quiet ${{ matrix.language_dir }}/data
|
||||
|
||||
- name: Checkout the language-specific repo
|
||||
uses: actions/checkout@dcd71f646680f2efd8db4afa5ad64fdcba30e748
|
||||
with:
|
||||
repository: ${{ matrix.language_repo }}
|
||||
token: ${{ secrets.DOCUBOT_READORG_REPO_WORKFLOW_SCOPES }}
|
||||
path: ${{ matrix.language_dir }}
|
||||
|
||||
- name: Get language SHA
|
||||
run: |
|
||||
gitref=$(cd ${{ matrix.language_dir }} && git rev-parse --short HEAD)
|
||||
echo "gitref=$gitref" >> $GITHUB_ENV
|
||||
|
||||
- name: 'Setup node'
|
||||
uses: actions/setup-node@17f8bd926464a1afa4c6a11669539e9c1ba77048
|
||||
with:
|
||||
node-version: '16.17.0'
|
||||
|
||||
- name: npm ci
|
||||
run: npm ci
|
||||
|
||||
- name: Create translation health report
|
||||
run: |
|
||||
translation_health_report=$( \
|
||||
node script/i18n/create-translation-health-report.js \
|
||||
--language ${{ matrix.language }} \
|
||||
--gitref ${{ env.gitref }} \
|
||||
| jq -Rsa .
|
||||
)
|
||||
echo "translation_health_report=$translation_health_report" >> $GITHUB_ENV
|
||||
|
||||
- name: Log in to Azure
|
||||
uses: azure/login@1f63701bf3e6892515f1b7ce2d2bf1708b46beaf
|
||||
with:
|
||||
creds: ${{ secrets.PROD_AZURE_CREDENTIALS }}
|
||||
|
||||
- name: Upload to Azure blob storage
|
||||
uses: azure/CLI@61bb69d64d613b52663984bf12d6bac8fd7b3cc8
|
||||
with:
|
||||
inlineScript: |
|
||||
az storage blob upload \
|
||||
--name "${{ matrix.language }}-latest.json" \
|
||||
--data $translation_health_report \
|
||||
--container-name translation-health-reports
|
||||
az storage blob upload \
|
||||
--name "${{ matrix.language }}-$(date +%Y-%m-%d).json" \
|
||||
--data $translation_health_report \
|
||||
--container-name translation-health-reports
|
||||
|
||||
- name: Log out from Azure
|
||||
if: always()
|
||||
run: |
|
||||
az logout
|
||||
|
||||
# Emit a notification for the first responder to triage if the workflow failed.
|
||||
- name: Send Slack notification if workflow failed
|
||||
uses: someimportantcompany/github-actions-slack-message@f8d28715e7b8a4717047d23f48c39827cacad340
|
||||
if: failure()
|
||||
with:
|
||||
channel: ${{ secrets.DOCS_ALERTS_SLACK_CHANNEL_ID }}
|
||||
bot-token: ${{ secrets.SLACK_DOCS_BOT_TOKEN }}
|
||||
color: failure
|
||||
text: 'The health report for ${{ matrix.language }} failed.'
|
||||
173
.github/workflows/wip-langs-create-translation-batch-pr.yml
vendored
Normal file
173
.github/workflows/wip-langs-create-translation-batch-pr.yml
vendored
Normal file
@@ -0,0 +1,173 @@
|
||||
name: WIP Languages Create translation Batch Pull Request
|
||||
|
||||
# **What it does**:
|
||||
# - Creates one pull request per WIP language after running a series of automated checks,
|
||||
# removing translations that are broken in any known way
|
||||
# **Why we have it**:
|
||||
# - To test the translation pipeline for WIP languages
|
||||
# **Who does it impact**: Helps test how WIP languages will behave in CI
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
create-translation-batch-for-wip-langs:
|
||||
name: Create translation batch for WIP languages
|
||||
if: github.repository == 'github/docs-internal'
|
||||
runs-on: ubuntu-latest
|
||||
# A sync's average run time is ~3.2 hours.
|
||||
# This sets a maximum execution time of 300 minutes (5 hours) to prevent the workflow from running longer than necessary.
|
||||
timeout-minutes: 300
|
||||
strategy:
|
||||
fail-fast: false
|
||||
max-parallel: 1
|
||||
matrix:
|
||||
include:
|
||||
- language: ru
|
||||
language_dir: translations/ru-RU
|
||||
language_repo: github/docs-internal.ru-ru
|
||||
|
||||
- language: ko
|
||||
language_dir: translations/ko-KR
|
||||
language_repo: github/docs-internal.ko-kr
|
||||
|
||||
- language: fr
|
||||
language_dir: translations/fr-FR
|
||||
language_repo: github/docs-internal.fr-fr
|
||||
|
||||
- language: de
|
||||
language_dir: translations/de-DE
|
||||
language_repo: github/docs-internal.de-de
|
||||
|
||||
steps:
|
||||
- name: Set branch name
|
||||
id: set-branch
|
||||
run: |
|
||||
echo "::set-output name=BRANCH_NAME::msft-translation-batch-${{ matrix.language }}-$(date +%Y-%m-%d__%H-%M)"
|
||||
|
||||
- run: git config --global user.name "docubot"
|
||||
- run: git config --global user.email "67483024+docubot@users.noreply.github.com"
|
||||
|
||||
- name: Checkout the docs-internal repo
|
||||
uses: actions/checkout@dcd71f646680f2efd8db4afa5ad64fdcba30e748
|
||||
with:
|
||||
fetch-depth: 0
|
||||
lfs: true
|
||||
|
||||
- name: Create a branch for the current language
|
||||
run: git checkout -b ${{ steps.set-branch.outputs.BRANCH_NAME }}
|
||||
|
||||
- name: Remove unwanted git hooks
|
||||
run: rm .git/hooks/post-checkout
|
||||
|
||||
- name: Remove all language translations
|
||||
run: |
|
||||
git rm -rf --quiet ${{ matrix.language_dir }}/content
|
||||
git rm -rf --quiet ${{ matrix.language_dir }}/data
|
||||
|
||||
- name: Checkout the language-specific repo
|
||||
uses: actions/checkout@dcd71f646680f2efd8db4afa5ad64fdcba30e748
|
||||
with:
|
||||
repository: ${{ matrix.language_repo }}
|
||||
token: ${{ secrets.DOCUBOT_READORG_REPO_WORKFLOW_SCOPES }}
|
||||
path: ${{ matrix.language_dir }}
|
||||
|
||||
- name: Remove .git from the language-specific repo
|
||||
run: rm -rf ${{ matrix.language_dir }}/.git
|
||||
|
||||
- name: Commit translated files
|
||||
run: |
|
||||
git add ${{ matrix.language_dir }}
|
||||
git commit -m "Add translations" || echo "Nothing to commit"
|
||||
|
||||
- name: 'Setup node'
|
||||
uses: actions/setup-node@17f8bd926464a1afa4c6a11669539e9c1ba77048
|
||||
with:
|
||||
node-version: '16.17.0'
|
||||
|
||||
- run: npm ci
|
||||
|
||||
- name: Homogenize frontmatter
|
||||
run: |
|
||||
node script/i18n/homogenize-frontmatter.js
|
||||
git add ${{ matrix.language_dir }} && git commit -m "Run script/i18n/homogenize-frontmatter.js" || echo "Nothing to commit"
|
||||
|
||||
- name: Fix translation errors
|
||||
run: |
|
||||
node script/i18n/fix-translation-errors.js
|
||||
git add ${{ matrix.language_dir }} && git commit -m "Run script/i18n/fix-translation-errors.js" || echo "Nothing to commit"
|
||||
|
||||
- name: Check rendering
|
||||
run: |
|
||||
node script/i18n/lint-translation-files.js --check rendering | tee -a /tmp/batch.log | cat
|
||||
git add ${{ matrix.language_dir }} && git commit -m "Run script/i18n/lint-translation-files.js --check rendering" || echo "Nothing to commit"
|
||||
|
||||
- name: Reset files with broken liquid tags
|
||||
run: |
|
||||
node script/i18n/msft-reset-files-with-broken-liquid-tags.js --language=${{ matrix.language }} | tee -a /tmp/batch.log | cat
|
||||
git add ${{ matrix.language_dir }} && git commit -m "run script/i18n/msft-reset-files-with-broken-liquid-tags.js --language=${{ matrix.language }}" || echo "Nothing to commit"
|
||||
|
||||
- name: Check in CSV report
|
||||
run: |
|
||||
mkdir -p translations/log
|
||||
csvFile=translations/log/msft-${{ matrix.language }}-resets.csv
|
||||
script/i18n/msft-report-reset-files.js --report-type=csv --language=${{ matrix.language }} --log-file=/tmp/batch.log > $csvFile
|
||||
git add -f $csvFile && git commit -m "Check in ${{ matrix.language }} CSV report" || echo "Nothing to commit"
|
||||
|
||||
- name: Write the reported files that were reset to /tmp/pr-body.txt
|
||||
run: script/i18n/msft-report-reset-files.js --report-type=pull-request-body --language=${{ matrix.language }} --log-file=/tmp/batch.log --csv-path=${{ steps.set-branch.outputs.BRANCH_NAME }}/translations/log/msft-${{ matrix.language }}-resets.csv > /tmp/pr-body.txt
|
||||
|
||||
- name: Push filtered translations
|
||||
run: git push origin ${{ steps.set-branch.outputs.BRANCH_NAME }}
|
||||
|
||||
- name: Close existing stale batches
|
||||
uses: lee-dohm/close-matching-issues@e9e43aad2fa6f06a058cedfd8fb975fd93b56d8f
|
||||
with:
|
||||
token: ${{ secrets.OCTOMERGER_PAT_WITH_REPO_AND_WORKFLOW_SCOPE }}
|
||||
query: 'type:pr label:translation-batch-${{ matrix.language }}'
|
||||
|
||||
- name: Create translation batch pull request
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.DOCUBOT_REPO_PAT }}
|
||||
TITLE: '[DO NOT MERGE - WIP Language test]: New translation batch for ${{ matrix.language }}'
|
||||
BASE: 'main'
|
||||
HEAD: ${{ steps.set-branch.outputs.BRANCH_NAME }}
|
||||
LANGUAGE: ${{ matrix.language }}
|
||||
BODY_FILE: '/tmp/pr-body.txt'
|
||||
run: .github/actions-scripts/msft-create-translation-batch-pr.js
|
||||
|
||||
# - name: Approve PR
|
||||
# if: github.ref_name == 'main'
|
||||
# env:
|
||||
# GITHUB_TOKEN: ${{ secrets.OCTOMERGER_PAT_WITH_REPO_AND_WORKFLOW_SCOPE }}
|
||||
# run: gh pr review --approve || echo "Nothing to approve"
|
||||
|
||||
# - name: Set auto-merge
|
||||
# if: github.ref_name == 'main'
|
||||
# env:
|
||||
# GITHUB_TOKEN: ${{ secrets.OCTOMERGER_PAT_WITH_REPO_AND_WORKFLOW_SCOPE }}
|
||||
# run: gh pr merge ${{ steps.set-branch.outputs.BRANCH_NAME }} --auto --squash || echo "Nothing to merge"
|
||||
|
||||
# When the maximum execution time is reached for this job, Actions cancels the workflow run.
|
||||
# This emits a notification for the first responder to triage.
|
||||
# - name: Send Slack notification if workflow is cancelled
|
||||
# uses: someimportantcompany/github-actions-slack-message@f8d28715e7b8a4717047d23f48c39827cacad340
|
||||
# if: cancelled()
|
||||
# with:
|
||||
# channel: ${{ secrets.DOCS_ALERTS_SLACK_CHANNEL_ID }}
|
||||
# bot-token: ${{ secrets.SLACK_DOCS_BOT_TOKEN }}🎉
|
||||
# color: failure
|
||||
# text: 'The new translation batch for ${{ matrix.language }} was cancelled.'
|
||||
|
||||
# Emit a notification for the first responder to triage if the workflow failed.
|
||||
# - name: Send Slack notification if workflow failed
|
||||
# uses: someimportantcompany/github-actions-slack-message@f8d28715e7b8a4717047d23f48c39827cacad340
|
||||
# if: failure()
|
||||
# with:
|
||||
# channel: ${{ secrets.DOCS_ALERTS_SLACK_CHANNEL_ID }}
|
||||
# bot-token: ${{ secrets.SLACK_DOCS_BOT_TOKEN }}
|
||||
# color: failure
|
||||
# text: 'The new translation batch for ${{ matrix.language }} failed.'
|
||||
163
script/i18n/create-translation-health-report.js
Executable file
163
script/i18n/create-translation-health-report.js
Executable file
@@ -0,0 +1,163 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// [start-readme]
|
||||
//
|
||||
// Create a list of errors and summary statistics for errors in a particular language.
|
||||
//
|
||||
// [end-readme]
|
||||
|
||||
/* Nota bene:
|
||||
If you are getting more errors all the sudden, try running this:
|
||||
$ script/i18n/create-translation-health-report.js -l en -r 000
|
||||
If there's any errors, const context = { ... } probably needs more data.
|
||||
*/
|
||||
|
||||
import { program } from 'commander'
|
||||
import fs from 'fs/promises'
|
||||
import { pick } from 'lodash-es'
|
||||
|
||||
import { loadPages, loadPageMap } from '../../lib/page-data.js'
|
||||
import loadSiteData from '../../lib/site-data.js'
|
||||
import loadRedirects from '../../lib/redirects/precompile.js'
|
||||
import { allVersions, allVersionKeys } from '../../lib/all-versions.js'
|
||||
import { languageKeys } from '../../lib/languages.js'
|
||||
import { getProductStringFromPath } from '../../lib/path-utils.js'
|
||||
|
||||
program
|
||||
.description('Create a translation health report for one language.')
|
||||
.requiredOption('-l, --language <language>', 'The language to health check')
|
||||
.requiredOption('-r, --gitref <sha>', 'Language repo latest git commit short SHA')
|
||||
.parse(process.argv)
|
||||
|
||||
// Gather popularity data the search uses to prioritize errors
|
||||
async function fetchPopularityData() {
|
||||
const output = {}
|
||||
const popularPagesRaw = await fs.readFile('lib/search/popular-pages.json', 'utf8')
|
||||
for (const line of popularPagesRaw.split('\n')) {
|
||||
try {
|
||||
const row = JSON.parse(line)
|
||||
output[row.path_article] = row.path_count
|
||||
} catch {}
|
||||
}
|
||||
return output
|
||||
}
|
||||
|
||||
async function collectPageErrors(page, { language, data, redirects, plainPath, pageMap }) {
|
||||
// Go through each version...
|
||||
const promises = allVersionKeys
|
||||
.filter((version) => page.applicableVersions.includes(version))
|
||||
.map(async (version) => {
|
||||
// Collect if errors
|
||||
const pageVersionErrors = []
|
||||
try {
|
||||
const path = `/${language}/${version}/${plainPath}`
|
||||
// Reference middleware/context.js for data shape
|
||||
const context = {
|
||||
...data, // needed for all pages
|
||||
currentVersion: version, // needed for all pages
|
||||
currentLanguage: language, // needed for all pages
|
||||
currentPath: path, // needed for all pages
|
||||
currentVersionObj: allVersions[version], // needed for ifversion tag
|
||||
currentProduct: getProductStringFromPath(path), // needed for learning-track on guides pages
|
||||
pages: pageMap, // needed for learning-track on guides pages
|
||||
redirects, // needed for learning-track on guides pages
|
||||
}
|
||||
await page.render(context, pageVersionErrors)
|
||||
} catch (err) {
|
||||
pageVersionErrors.push(err)
|
||||
}
|
||||
if (pageVersionErrors.length) {
|
||||
return [
|
||||
version,
|
||||
// Filter down properties to make it easier for
|
||||
// translators to get the clearest information on the error
|
||||
pageVersionErrors.map((err) => pick(err, ['name', 'message', 'token.content'])),
|
||||
]
|
||||
// Other fields: Object.getOwnPropertyNames(err)
|
||||
}
|
||||
})
|
||||
const arr = (await Promise.all(promises)).filter(Boolean)
|
||||
if (arr.length) {
|
||||
return Object.fromEntries(arr)
|
||||
}
|
||||
}
|
||||
|
||||
function groupErrors(errors) {
|
||||
return errors
|
||||
.map((page) => Object.values(page.versions).flat())
|
||||
.flat()
|
||||
.map((version) => version.message)
|
||||
.reduce((sum, val) => {
|
||||
sum[val] = sum[val] || 0
|
||||
sum[val]++
|
||||
return sum
|
||||
}, {})
|
||||
}
|
||||
|
||||
async function createReport() {
|
||||
// Check that the language is valid
|
||||
const { language, gitref } = program.opts()
|
||||
if (!languageKeys.includes(language)) {
|
||||
throw new Error(`Language ${language} is not in ${languageKeys.join()}.`)
|
||||
}
|
||||
|
||||
// Load popularity data to sort errors
|
||||
const popularity = await fetchPopularityData()
|
||||
|
||||
// Load all pages
|
||||
const allPages = await loadPages()
|
||||
const dataErrors = []
|
||||
const data = loadSiteData(dataErrors)[language]
|
||||
const pages = allPages
|
||||
.filter((page) => page.languageCode === language)
|
||||
// Early access pages log to the console, which would show in the report
|
||||
.filter((page) => !page.relativePath.includes('early-access'))
|
||||
const pageMap = await loadPageMap(pages)
|
||||
const redirects = await loadRedirects(pages)
|
||||
|
||||
// Try to render each page
|
||||
const pageErrors = (
|
||||
await Promise.all(
|
||||
pages.map(async (page) => {
|
||||
const plainPath = page.relativePath.replace('/index.md', '').replace('.md', '')
|
||||
const errorsByVersion = await collectPageErrors(page, {
|
||||
language,
|
||||
data,
|
||||
redirects,
|
||||
plainPath,
|
||||
pageMap,
|
||||
})
|
||||
if (errorsByVersion) {
|
||||
return {
|
||||
path: plainPath,
|
||||
popularity: popularity[plainPath] || 0,
|
||||
versions: errorsByVersion,
|
||||
}
|
||||
}
|
||||
})
|
||||
)
|
||||
)
|
||||
.filter(Boolean)
|
||||
// Sort by popularity desc so the translators know what to focus on first
|
||||
.sort((a, b) => b.popularity - a.popularity)
|
||||
|
||||
// Begin an output report
|
||||
const report = {
|
||||
language,
|
||||
gitref,
|
||||
datetime: new Date().toJSON(),
|
||||
totalPages: pages.length,
|
||||
// totalErrorPages should be around en: 0, es: 1043, ja: 1004, pt: 995, cn: 1063
|
||||
totalErrorPages: pageErrors.length,
|
||||
pageErrors,
|
||||
// To group errors by message instead
|
||||
groupedPageErrors: groupErrors(pageErrors),
|
||||
// Filter down properties to make it easier for
|
||||
// translators to get the clearest information on the error
|
||||
dataErrors: dataErrors.map((err) => pick(err, ['name', 'message', 'token.content'])),
|
||||
}
|
||||
|
||||
return report
|
||||
}
|
||||
|
||||
console.log(JSON.stringify(await createReport(), null, 2))
|
||||
Reference in New Issue
Block a user