1
0
mirror of synced 2026-01-06 06:02:35 -05:00

Merge pull request #22063 from github/repo-sync

repo sync
This commit is contained in:
Octomerger Bot
2022-11-16 11:32:50 -08:00
committed by GitHub
3 changed files with 468 additions and 0 deletions

View File

@@ -0,0 +1,132 @@
name: Translation health report
# **What it does**: Provides errors and summary statistics on rendering translated content.
# **Why we have it**: To improve our translations by having clearer visibility.
# **Who does it impact**: Docs engineering, Microsoft translators.
on:
workflow_dispatch:
schedule:
- cron: '20 16 * * *' # Run every day at 16:20 UTC / 8:20 PST
permissions:
contents: read
jobs:
create-translation-health-report:
name: Create translation health report
if: github.repository == 'github/docs-internal'
runs-on: ubuntu-latest
# This sets a maximum execution time of 300 minutes (5 hours)
# to prevent the workflow from running longer than necessary.
timeout-minutes: 300
strategy:
fail-fast: false
max-parallel: 1
matrix:
include:
- language: es
language_dir: translations/es-ES
language_repo: github/docs-internal.es-es
- language: ja
language_dir: translations/ja-JP
language_repo: github/docs-internal.ja-jp
- language: pt
language_dir: translations/pt-BR
language_repo: github/docs-internal.pt-br
- language: cn
language_dir: translations/zh-CN
language_repo: github/docs-internal.zh-cn
# We'll be ready to add the following languages in a future effort.
# - language: ru
# language_dir: translations/ru-RU
# language_repo: github/docs-internal.ru-ru
# - language: ko
# language_dir: translations/ko-KR
# language_repo: github/docs-internal.ko-kr
# - language: fr
# language_dir: translations/fr-FR
# language_repo: github/docs-internal.fr-fr
# - language: de
# language_dir: translations/de-DE
# language_repo: github/docs-internal.de-de
steps:
- name: Checkout the docs-internal repo
uses: actions/checkout@dcd71f646680f2efd8db4afa5ad64fdcba30e748
- name: Remove all language translations
run: |
git rm -rf --quiet ${{ matrix.language_dir }}/content
git rm -rf --quiet ${{ matrix.language_dir }}/data
- name: Checkout the language-specific repo
uses: actions/checkout@dcd71f646680f2efd8db4afa5ad64fdcba30e748
with:
repository: ${{ matrix.language_repo }}
token: ${{ secrets.DOCUBOT_READORG_REPO_WORKFLOW_SCOPES }}
path: ${{ matrix.language_dir }}
- name: Get language SHA
run: |
gitref=$(cd ${{ matrix.language_dir }} && git rev-parse --short HEAD)
echo "gitref=$gitref" >> $GITHUB_ENV
- name: 'Setup node'
uses: actions/setup-node@17f8bd926464a1afa4c6a11669539e9c1ba77048
with:
node-version: '16.17.0'
- name: npm ci
run: npm ci
- name: Create translation health report
run: |
translation_health_report=$( \
node script/i18n/create-translation-health-report.js \
--language ${{ matrix.language }} \
--gitref ${{ env.gitref }} \
| jq -Rsa .
)
echo "translation_health_report=$translation_health_report" >> $GITHUB_ENV
- name: Log in to Azure
uses: azure/login@1f63701bf3e6892515f1b7ce2d2bf1708b46beaf
with:
creds: ${{ secrets.PROD_AZURE_CREDENTIALS }}
- name: Upload to Azure blob storage
uses: azure/CLI@61bb69d64d613b52663984bf12d6bac8fd7b3cc8
with:
inlineScript: |
az storage blob upload \
--name "${{ matrix.language }}-latest.json" \
--data $translation_health_report \
--container-name translation-health-reports
az storage blob upload \
--name "${{ matrix.language }}-$(date +%Y-%m-%d).json" \
--data $translation_health_report \
--container-name translation-health-reports
- name: Log out from Azure
if: always()
run: |
az logout
# Emit a notification for the first responder to triage if the workflow failed.
- name: Send Slack notification if workflow failed
uses: someimportantcompany/github-actions-slack-message@f8d28715e7b8a4717047d23f48c39827cacad340
if: failure()
with:
channel: ${{ secrets.DOCS_ALERTS_SLACK_CHANNEL_ID }}
bot-token: ${{ secrets.SLACK_DOCS_BOT_TOKEN }}
color: failure
text: 'The health report for ${{ matrix.language }} failed.'

View File

@@ -0,0 +1,173 @@
name: WIP Languages Create translation Batch Pull Request
# **What it does**:
# - Creates one pull request per WIP language after running a series of automated checks,
# removing translations that are broken in any known way
# **Why we have it**:
# - To test the translation pipeline for WIP languages
# **Who does it impact**: Helps test how WIP languages will behave in CI
on:
workflow_dispatch:
permissions:
contents: write
jobs:
create-translation-batch-for-wip-langs:
name: Create translation batch for WIP languages
if: github.repository == 'github/docs-internal'
runs-on: ubuntu-latest
# A sync's average run time is ~3.2 hours.
# This sets a maximum execution time of 300 minutes (5 hours) to prevent the workflow from running longer than necessary.
timeout-minutes: 300
strategy:
fail-fast: false
max-parallel: 1
matrix:
include:
- language: ru
language_dir: translations/ru-RU
language_repo: github/docs-internal.ru-ru
- language: ko
language_dir: translations/ko-KR
language_repo: github/docs-internal.ko-kr
- language: fr
language_dir: translations/fr-FR
language_repo: github/docs-internal.fr-fr
- language: de
language_dir: translations/de-DE
language_repo: github/docs-internal.de-de
steps:
- name: Set branch name
id: set-branch
run: |
echo "::set-output name=BRANCH_NAME::msft-translation-batch-${{ matrix.language }}-$(date +%Y-%m-%d__%H-%M)"
- run: git config --global user.name "docubot"
- run: git config --global user.email "67483024+docubot@users.noreply.github.com"
- name: Checkout the docs-internal repo
uses: actions/checkout@dcd71f646680f2efd8db4afa5ad64fdcba30e748
with:
fetch-depth: 0
lfs: true
- name: Create a branch for the current language
run: git checkout -b ${{ steps.set-branch.outputs.BRANCH_NAME }}
- name: Remove unwanted git hooks
run: rm .git/hooks/post-checkout
- name: Remove all language translations
run: |
git rm -rf --quiet ${{ matrix.language_dir }}/content
git rm -rf --quiet ${{ matrix.language_dir }}/data
- name: Checkout the language-specific repo
uses: actions/checkout@dcd71f646680f2efd8db4afa5ad64fdcba30e748
with:
repository: ${{ matrix.language_repo }}
token: ${{ secrets.DOCUBOT_READORG_REPO_WORKFLOW_SCOPES }}
path: ${{ matrix.language_dir }}
- name: Remove .git from the language-specific repo
run: rm -rf ${{ matrix.language_dir }}/.git
- name: Commit translated files
run: |
git add ${{ matrix.language_dir }}
git commit -m "Add translations" || echo "Nothing to commit"
- name: 'Setup node'
uses: actions/setup-node@17f8bd926464a1afa4c6a11669539e9c1ba77048
with:
node-version: '16.17.0'
- run: npm ci
- name: Homogenize frontmatter
run: |
node script/i18n/homogenize-frontmatter.js
git add ${{ matrix.language_dir }} && git commit -m "Run script/i18n/homogenize-frontmatter.js" || echo "Nothing to commit"
- name: Fix translation errors
run: |
node script/i18n/fix-translation-errors.js
git add ${{ matrix.language_dir }} && git commit -m "Run script/i18n/fix-translation-errors.js" || echo "Nothing to commit"
- name: Check rendering
run: |
node script/i18n/lint-translation-files.js --check rendering | tee -a /tmp/batch.log | cat
git add ${{ matrix.language_dir }} && git commit -m "Run script/i18n/lint-translation-files.js --check rendering" || echo "Nothing to commit"
- name: Reset files with broken liquid tags
run: |
node script/i18n/msft-reset-files-with-broken-liquid-tags.js --language=${{ matrix.language }} | tee -a /tmp/batch.log | cat
git add ${{ matrix.language_dir }} && git commit -m "run script/i18n/msft-reset-files-with-broken-liquid-tags.js --language=${{ matrix.language }}" || echo "Nothing to commit"
- name: Check in CSV report
run: |
mkdir -p translations/log
csvFile=translations/log/msft-${{ matrix.language }}-resets.csv
script/i18n/msft-report-reset-files.js --report-type=csv --language=${{ matrix.language }} --log-file=/tmp/batch.log > $csvFile
git add -f $csvFile && git commit -m "Check in ${{ matrix.language }} CSV report" || echo "Nothing to commit"
- name: Write the reported files that were reset to /tmp/pr-body.txt
run: script/i18n/msft-report-reset-files.js --report-type=pull-request-body --language=${{ matrix.language }} --log-file=/tmp/batch.log --csv-path=${{ steps.set-branch.outputs.BRANCH_NAME }}/translations/log/msft-${{ matrix.language }}-resets.csv > /tmp/pr-body.txt
- name: Push filtered translations
run: git push origin ${{ steps.set-branch.outputs.BRANCH_NAME }}
- name: Close existing stale batches
uses: lee-dohm/close-matching-issues@e9e43aad2fa6f06a058cedfd8fb975fd93b56d8f
with:
token: ${{ secrets.OCTOMERGER_PAT_WITH_REPO_AND_WORKFLOW_SCOPE }}
query: 'type:pr label:translation-batch-${{ matrix.language }}'
- name: Create translation batch pull request
env:
GITHUB_TOKEN: ${{ secrets.DOCUBOT_REPO_PAT }}
TITLE: '[DO NOT MERGE - WIP Language test]: New translation batch for ${{ matrix.language }}'
BASE: 'main'
HEAD: ${{ steps.set-branch.outputs.BRANCH_NAME }}
LANGUAGE: ${{ matrix.language }}
BODY_FILE: '/tmp/pr-body.txt'
run: .github/actions-scripts/msft-create-translation-batch-pr.js
# - name: Approve PR
# if: github.ref_name == 'main'
# env:
# GITHUB_TOKEN: ${{ secrets.OCTOMERGER_PAT_WITH_REPO_AND_WORKFLOW_SCOPE }}
# run: gh pr review --approve || echo "Nothing to approve"
# - name: Set auto-merge
# if: github.ref_name == 'main'
# env:
# GITHUB_TOKEN: ${{ secrets.OCTOMERGER_PAT_WITH_REPO_AND_WORKFLOW_SCOPE }}
# run: gh pr merge ${{ steps.set-branch.outputs.BRANCH_NAME }} --auto --squash || echo "Nothing to merge"
# When the maximum execution time is reached for this job, Actions cancels the workflow run.
# This emits a notification for the first responder to triage.
# - name: Send Slack notification if workflow is cancelled
# uses: someimportantcompany/github-actions-slack-message@f8d28715e7b8a4717047d23f48c39827cacad340
# if: cancelled()
# with:
# channel: ${{ secrets.DOCS_ALERTS_SLACK_CHANNEL_ID }}
# bot-token: ${{ secrets.SLACK_DOCS_BOT_TOKEN }}🎉
# color: failure
# text: 'The new translation batch for ${{ matrix.language }} was cancelled.'
# Emit a notification for the first responder to triage if the workflow failed.
# - name: Send Slack notification if workflow failed
# uses: someimportantcompany/github-actions-slack-message@f8d28715e7b8a4717047d23f48c39827cacad340
# if: failure()
# with:
# channel: ${{ secrets.DOCS_ALERTS_SLACK_CHANNEL_ID }}
# bot-token: ${{ secrets.SLACK_DOCS_BOT_TOKEN }}
# color: failure
# text: 'The new translation batch for ${{ matrix.language }} failed.'

View File

@@ -0,0 +1,163 @@
#!/usr/bin/env node
// [start-readme]
//
// Create a list of errors and summary statistics for errors in a particular language.
//
// [end-readme]
/* Nota bene:
If you are getting more errors all the sudden, try running this:
$ script/i18n/create-translation-health-report.js -l en -r 000
If there's any errors, const context = { ... } probably needs more data.
*/
import { program } from 'commander'
import fs from 'fs/promises'
import { pick } from 'lodash-es'
import { loadPages, loadPageMap } from '../../lib/page-data.js'
import loadSiteData from '../../lib/site-data.js'
import loadRedirects from '../../lib/redirects/precompile.js'
import { allVersions, allVersionKeys } from '../../lib/all-versions.js'
import { languageKeys } from '../../lib/languages.js'
import { getProductStringFromPath } from '../../lib/path-utils.js'
program
.description('Create a translation health report for one language.')
.requiredOption('-l, --language <language>', 'The language to health check')
.requiredOption('-r, --gitref <sha>', 'Language repo latest git commit short SHA')
.parse(process.argv)
// Gather popularity data the search uses to prioritize errors
async function fetchPopularityData() {
const output = {}
const popularPagesRaw = await fs.readFile('lib/search/popular-pages.json', 'utf8')
for (const line of popularPagesRaw.split('\n')) {
try {
const row = JSON.parse(line)
output[row.path_article] = row.path_count
} catch {}
}
return output
}
async function collectPageErrors(page, { language, data, redirects, plainPath, pageMap }) {
// Go through each version...
const promises = allVersionKeys
.filter((version) => page.applicableVersions.includes(version))
.map(async (version) => {
// Collect if errors
const pageVersionErrors = []
try {
const path = `/${language}/${version}/${plainPath}`
// Reference middleware/context.js for data shape
const context = {
...data, // needed for all pages
currentVersion: version, // needed for all pages
currentLanguage: language, // needed for all pages
currentPath: path, // needed for all pages
currentVersionObj: allVersions[version], // needed for ifversion tag
currentProduct: getProductStringFromPath(path), // needed for learning-track on guides pages
pages: pageMap, // needed for learning-track on guides pages
redirects, // needed for learning-track on guides pages
}
await page.render(context, pageVersionErrors)
} catch (err) {
pageVersionErrors.push(err)
}
if (pageVersionErrors.length) {
return [
version,
// Filter down properties to make it easier for
// translators to get the clearest information on the error
pageVersionErrors.map((err) => pick(err, ['name', 'message', 'token.content'])),
]
// Other fields: Object.getOwnPropertyNames(err)
}
})
const arr = (await Promise.all(promises)).filter(Boolean)
if (arr.length) {
return Object.fromEntries(arr)
}
}
function groupErrors(errors) {
return errors
.map((page) => Object.values(page.versions).flat())
.flat()
.map((version) => version.message)
.reduce((sum, val) => {
sum[val] = sum[val] || 0
sum[val]++
return sum
}, {})
}
async function createReport() {
// Check that the language is valid
const { language, gitref } = program.opts()
if (!languageKeys.includes(language)) {
throw new Error(`Language ${language} is not in ${languageKeys.join()}.`)
}
// Load popularity data to sort errors
const popularity = await fetchPopularityData()
// Load all pages
const allPages = await loadPages()
const dataErrors = []
const data = loadSiteData(dataErrors)[language]
const pages = allPages
.filter((page) => page.languageCode === language)
// Early access pages log to the console, which would show in the report
.filter((page) => !page.relativePath.includes('early-access'))
const pageMap = await loadPageMap(pages)
const redirects = await loadRedirects(pages)
// Try to render each page
const pageErrors = (
await Promise.all(
pages.map(async (page) => {
const plainPath = page.relativePath.replace('/index.md', '').replace('.md', '')
const errorsByVersion = await collectPageErrors(page, {
language,
data,
redirects,
plainPath,
pageMap,
})
if (errorsByVersion) {
return {
path: plainPath,
popularity: popularity[plainPath] || 0,
versions: errorsByVersion,
}
}
})
)
)
.filter(Boolean)
// Sort by popularity desc so the translators know what to focus on first
.sort((a, b) => b.popularity - a.popularity)
// Begin an output report
const report = {
language,
gitref,
datetime: new Date().toJSON(),
totalPages: pages.length,
// totalErrorPages should be around en: 0, es: 1043, ja: 1004, pt: 995, cn: 1063
totalErrorPages: pageErrors.length,
pageErrors,
// To group errors by message instead
groupedPageErrors: groupErrors(pageErrors),
// Filter down properties to make it easier for
// translators to get the clearest information on the error
dataErrors: dataErrors.map((err) => pick(err, ['name', 'message', 'token.content'])),
}
return report
}
console.log(JSON.stringify(await createReport(), null, 2))