1
0
mirror of synced 2025-12-19 09:57:42 -05:00

remove translation health reports (#48800)

This commit is contained in:
Rachael Sewell
2024-01-22 10:52:41 -08:00
committed by GitHub
parent b428d248fa
commit 65a3eb9811
4 changed files with 0 additions and 308 deletions

View File

@@ -1,137 +0,0 @@
name: Translation health report
# **What it does**: Provides errors and summary statistics on rendering translated content.
# **Why we have it**: To improve our translations by having clearer visibility.
# **Who does it impact**: Docs engineering, Microsoft translators.
on:
workflow_dispatch:
schedule:
- cron: '20 16 * * *' # Run every day at 16:20 UTC / 8:20 PST
permissions:
contents: write
jobs:
create-translation-health-report:
name: Create translation health report
if: github.repository == 'github/docs-internal'
runs-on: ubuntu-20.04-xl
# This sets a maximum execution time of 300 minutes (5 hours)
# to prevent the workflow from running longer than necessary.
timeout-minutes: 300
strategy:
fail-fast: false
matrix:
include:
- language: zh
language_dir: translations/zh-cn
language_repo: github/docs-internal.zh-cn
- language: es
language_dir: translations/es-es
language_repo: github/docs-internal.es-es
- language: pt
language_dir: translations/pt-br
language_repo: github/docs-internal.pt-br
- language: ru
language_dir: translations/ru-ru
language_repo: github/docs-internal.ru-ru
- language: ja
language_dir: translations/ja-jp
language_repo: github/docs-internal.ja-jp
- language: fr
language_dir: translations/fr-fr
language_repo: github/docs-internal.fr-fr
- language: de
language_dir: translations/de-de
language_repo: github/docs-internal.de-de
- language: ko
language_dir: translations/ko-kr
language_repo: github/docs-internal.ko-kr
steps:
- name: Checkout the docs-internal repo
uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
- name: Clone docs-internal.popular-pages
uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
with:
repository: github/docs-internal.popular-pages
# This works because user `docs-bot` has read access to that private repo.
token: ${{ secrets.DOCS_BOT_PAT_READPUBLICKEY }}
path: popular-pages
- name: Checkout the language-specific repo
uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
with:
repository: ${{ matrix.language_repo }}
token: ${{ secrets.DOCS_BOT_PAT_WORKFLOW_READORG }}
path: ${{ matrix.language_dir }}
- name: Get language SHA
run: |
gitref=$(cd ${{ matrix.language_dir }} && git rev-parse --short HEAD)
echo "gitref=$gitref" >> $GITHUB_ENV
- uses: ./.github/actions/node-npm-setup
- name: Create translation health report
env:
POPULAR_PAGES_JSON: popular-pages/records/popular-pages.json
run: |
npm run create-translation-health-report -- \
--language ${{ matrix.language }} \
--gitref ${{ env.gitref }} \
>> $GITHUB_WORKSPACE/translation-health-report.json
- name: View report in workflow
run: cat $GITHUB_WORKSPACE/translation-health-report.json
- name: Log in to Azure
uses: azure/login@92a5484dfaf04ca78a94597f4f19fea633851fa2 # pin @v1.4.6
with:
creds: ${{ secrets.PROD_AZURE_CREDENTIALS }}
# https://learn.microsoft.com/en-us/cli/azure/storage/blob?view=azure-cli-latest#az-storage-blob-upload
# https://github.com/marketplace/actions/azure-cli-action
- name: Upload latest to Azure blob storage
uses: azure/CLI@b0e31ae20280d899279f14c36e877b4c6916e2d3 # pin @v1.0.8
with:
inlineScript: |
az storage blob upload \
--name "${{ matrix.language }}-latest.json" \
--file $GITHUB_WORKSPACE/translation-health-report.json \
--account-name githubdocs \
--account-key ${{ secrets.AZURE_GITHUBDOCS_STORAGE_KEY }} \
--container-name translation-health-reports \
--overwrite true
- name: Upload date formatted to Azure blob storage
uses: azure/CLI@b0e31ae20280d899279f14c36e877b4c6916e2d3 # pin @v1.0.8
with:
inlineScript: |
# Write a date formatted for historical reference
az storage blob upload \
--name "${{ matrix.language }}-$(date +%Y-%m-%d).json" \
--file $GITHUB_WORKSPACE/translation-health-report.json \
--account-name githubdocs \
--account-key ${{ secrets.AZURE_GITHUBDOCS_STORAGE_KEY }} \
--container-name translation-health-reports \
--overwrite true
- name: Log out from Azure
if: always()
run: |
az logout
- uses: ./.github/actions/slack-alert
if: ${{ failure() && github.event_name != 'workflow_dispatch' }}
with:
slack_channel_id: ${{ secrets.DOCS_ALERTS_SLACK_CHANNEL_ID }}
slack_token: ${{ secrets.SLACK_DOCS_BOT_TOKEN }}

View File

@@ -19,31 +19,3 @@ We do not accept translation changes from open source contributors.
- [ ] Do not translate anything inside of Liquid tags, such as `{% data %}` or `{% ifversion ... %}`, `{% note %}` or `{{ someVariable }}`.
- [ ] Be sure to translate the frontmatter properties `title`, `shortTitle`, `intro`, `permissions` but leave all other keys in each content `.md` file
- [ ] For every `{% ifversion ... %}` there's a `{% endif %}` following it
## Error diagnosis
We provide error reports that we upload daily.
<details>
<summary>Report locations</summary>
We have both "latest" or date in 'yyyy-mm-dd' format for historical reference.
- https://githubdocs.blob.core.windows.net/translation-health-reports/es-latest.json
- https://githubdocs.blob.core.windows.net/translation-health-reports/es-2023-01-05.json
- https://githubdocs.blob.core.windows.net/translation-health-reports/ja-latest.json
- https://githubdocs.blob.core.windows.net/translation-health-reports/ja-2023-01-05.json
- https://githubdocs.blob.core.windows.net/translation-health-reports/pt-latest.json
- https://githubdocs.blob.core.windows.net/translation-health-reports/pt-2023-01-05.json
- https://githubdocs.blob.core.windows.net/translation-health-reports/zh-latest.json
- https://githubdocs.blob.core.windows.net/translation-health-reports/zh-2023-01-05.json
- https://githubdocs.blob.core.windows.net/translation-health-reports/ru-latest.json
- https://githubdocs.blob.core.windows.net/translation-health-reports/ru-2023-01-05.json
- https://githubdocs.blob.core.windows.net/translation-health-reports/ko-latest.json
- https://githubdocs.blob.core.windows.net/translation-health-reports/ko-2023-01-05.json
- https://githubdocs.blob.core.windows.net/translation-health-reports/fr-latest.json
- https://githubdocs.blob.core.windows.net/translation-health-reports/fr-2023-01-05.json
- https://githubdocs.blob.core.windows.net/translation-health-reports/de-latest.json
- https://githubdocs.blob.core.windows.net/translation-health-reports/de-2023-01-05.json
</details>

View File

@@ -22,7 +22,6 @@
"check-content-type": "node src/workflows/check-content-type.js",
"check-github-github-links": "node src/links/scripts/check-github-github-links.js",
"copy-fixture-data": "node src/tests/scripts/copy-fixture-data.js",
"create-translation-health-report": "node src/languages/scripts/create-translation-health-report.js",
"debug": "cross-env NODE_ENV=development ENABLED_LANGUAGES=en nodemon --inspect src/frame/server.js",
"delete-orphan-translation-files": "tsx src/workflows/delete-orphan-translation-files.ts",
"dev": "cross-env npm start",

View File

@@ -1,142 +0,0 @@
#!/usr/bin/env node
// [start-readme]
//
// Create a list of errors and summary statistics for errors in a particular language.
//
// [end-readme]
/* Nota bene:
If you are getting more errors all the sudden, try running this:
$ npm run create-translation-health-report -- -l en -r 000
If there's any errors before getting the JSON output,
const context = { ... } probably needs more data.
*/
import { program } from 'commander'
import fs from 'fs/promises'
program
.description('Create a translation health report for one language.')
.requiredOption('-l, --language <language>', 'The language to health check')
.option('-r, --gitref <sha>', 'Language repo latest git commit short SHA')
.parse(process.argv)
// Throw errors instead of falling back to English
process.env.DEBUG_TRANSLATION_FALLBACKS = true
// The error option stops everything, but we want it to continue to generate the full report
process.env.ENABLED_LANGUAGES = `en,${program.opts().language}`
// In debug mode, it will call console.warn ... so overriding :)
// Want to make sure the result is valid JSON
const prevConsoleWarn = console.warn
const prevConsoleError = console.error
let issues = []
console.warn = console.error = (...args) => {
if (args.length > 1) {
issues.push({ message: args.map(String).join(' '), score: 0 })
} else if (typeof args[0] === 'string') {
issues.push({ message: args[0], score: 0 })
} else if (args[0]?.constructor === Object) {
const path = args[0].path?.replace('/index.md', '').replace('.md', '')
issues.push({ path, message: args[0].message, score: scores[path] || 0 })
} else if (Array.isArray(args[0]) && args[0][0]?.constructor === Object && args[0][0].filepath) {
// This is a YML parsing error. It's serious enough to bump the score.
let message = args[0][0].message
if (args[0][0].reason) {
message += ` (reason: ${args[0][0].reason})`
}
const path = args[0][0].filepath
// By giving it a +100 on the score, it at least stands above all the
// other issues which are mostly score 0. It's artificial but it works.
issues.push({ path, message, score: (scores[path] || 0) + 100 })
} else {
// Don't use .warn() because this logging here is for the engineer
// working on this script.
console.log("WARNING: Don't know how to turn these args into an issue", args)
}
}
// Weird import syntax, but forces it to load after process.env... changes
const { languageKeys } = await import('#src/languages/lib/languages.js')
const { loadPages, loadPageMap } = await import('#src/frame/lib/page-data.js')
const { precompileRedirects } = await import('#src/redirects/lib/precompile.js')
const { allVersions, allVersionKeys } = await import('#src/versions/lib/all-versions.js')
const { getProductStringFromPath } = await import('#src/frame/lib/path-utils.js')
// Check that the language is valid
const { language, gitref } = program.opts()
if (!languageKeys.includes(language)) {
throw new Error(`Language ${language} is not in ${languageKeys.join()}.`)
}
// Gather popularity data the search uses to prioritize errors
const scores = {}
const { POPULAR_PAGES_JSON } = process.env
if (POPULAR_PAGES_JSON) {
const popularPagesRaw = await fs.readFile(POPULAR_PAGES_JSON, 'utf8')
for (const row of JSON.parse(popularPagesRaw)) {
scores[row.path_article] = row.path_count
}
}
// Load all pages in language
const allPages = await loadPages()
const pages = allPages.filter((page) => page.languageCode === language)
const pageMap = await loadPageMap(pages)
const redirects = await precompileRedirects(pages)
// Try to render each page
for (const page of pages) {
const plainPath = page.relativePath.replace('/index.md', '').replace('.md', '')
// Go through each version...
const versions = allVersionKeys.filter((version) => page.applicableVersions.includes(version))
const pageIssues = {}
for (const version of versions) {
const path = `/${language}/${version}/${plainPath}`
// Reference middleware/context.js for shape
const context = {
currentVersion: version, // needed for all pages
currentLanguage: language, // needed for all pages
currentPath: path, // needed for all pages
currentVersionObj: allVersions[version], // needed for ifversion tag
currentProduct: getProductStringFromPath(path), // needed for learning-track on guides pages
pages: pageMap, // needed for learning-track on guides pages
redirects, // needed for learning-track on guides pages
}
try {
await page.render(context)
} catch (err) {
// Which messages apply to which versions
pageIssues[err.message] = pageIssues[err.message] || []
pageIssues[err.message].push(version)
}
}
if (Object.keys(pageIssues).length) {
issues.push({
path: plainPath,
messages: pageIssues,
score: scores[plainPath] || 0,
})
}
}
// Sort by score desc so the translators know what to focus on first
// Issues with more information should be higher
issues = issues
.filter((issue) => !issue.path?.includes('early-access'))
.sort((a, b) => b.score - a.score || JSON.stringify(b).length - JSON.stringify(a).length)
// Begin an output report
const report = {
language,
gitref,
datetime: new Date().toJSON(),
issuesCount: issues.length,
issues,
}
console.warn = prevConsoleWarn
console.error = prevConsoleError
console.log(JSON.stringify(report, null, 2))