diff --git a/.github/workflows/delete-orphan-translation-files.yml b/.github/workflows/delete-orphan-translation-files.yml new file mode 100644 index 0000000000..2ce1de12cf --- /dev/null +++ b/.github/workflows/delete-orphan-translation-files.yml @@ -0,0 +1,116 @@ +name: Delete orphan translation files + +# **What it does**: +# Compares content & data files left in each translation that aren't +# in docs-internal. Then creates a PR to delete these files. +# **Why we have it**: +# When Juno dumps to each translation repo it can not account for the +# fact that files in docs-internal get moved or deleted. So the +# sum total of files constantly grows. +# This leads to excess files in each translation repo that are not +# ever used but has to be put into every production build. +# **Who does it impact**: Docs engineering + +on: + workflow_dispatch: + schedule: + - cron: '20 16 * * 1' # Run every Monday at 16:20 UTC / 8:20 PST + +permissions: + contents: write + +jobs: + delete-orphan-translation-files: + if: github.repository == 'github/docs-internal' + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + include: + - language: zh + language_dir: translations/zh-cn + language_repo: github/docs-internal.zh-cn + + - language: es + language_dir: translations/es-es + language_repo: github/docs-internal.es-es + + - language: pt + language_dir: translations/pt-br + language_repo: github/docs-internal.pt-br + + - language: ru + language_dir: translations/ru-ru + language_repo: github/docs-internal.ru-ru + + - language: ja + language_dir: translations/ja-jp + language_repo: github/docs-internal.ja-jp + + - language: fr + language_dir: translations/fr-fr + language_repo: github/docs-internal.fr-fr + + - language: de + language_dir: translations/de-de + language_repo: github/docs-internal.de-de + + - language: ko + language_dir: translations/ko-kr + language_repo: github/docs-internal.ko-kr + + steps: + - uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0 + + - name: Checkout the language-specific repo + uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0 + with: + repository: ${{ matrix.language_repo }} + token: ${{ secrets.DOCS_BOT_PAT_READPUBLICKEY }} + path: ${{ matrix.language_dir }} + + - uses: ./.github/actions/node-npm-setup + + - name: Delete orphan files + run: | + npm run delete-orphan-translation-files -- ${{ matrix.language_dir }} + + - name: Debug deleted files + working-directory: ${{ matrix.language_dir }} + run: git status + + - name: Git config + working-directory: ${{ matrix.language_dir }} + run: | + git config --global user.name "docs-bot" + git config --global user.email "77750099+docs-bot@users.noreply.github.com" + + - name: Git commit and push, create and merge PR + working-directory: ${{ matrix.language_dir }} + env: + # Needed for gh + GH_TOKEN: ${{ secrets.DOCS_BOT_PAT_READPUBLICKEY }} + run: | + git status + current_timestamp=$(date '+%Y-%m-%d-%H%M%S') + branch_name="delete-orphan-files-$current_timestamp" + git checkout -b "$branch_name" + current_daystamp=$(date '+%Y-%m-%d') + git commit -a -m "Delete orphan files ($current_daystamp)" + git push origin "$branch_name" + + # Create PR + echo "Creating pull request..." + gh pr create \ + --title "Delete orphan files ($current_daystamp)" \ + --body '👋 humans. This PR was generated from docs-internal/.github/workflows/delete-orphan-translation-files.yml. + ' \ + --repo "${{ matrix.language_repo }}" + echo "Merge created PR..." + gh pr merge --merge --auto --delete-branch "$branch_name" + + - uses: ./.github/actions/slack-alert + if: ${{ failure() && github.event_name != 'workflow_dispatch' }} + with: + slack_channel_id: ${{ secrets.DOCS_ALERTS_SLACK_CHANNEL_ID }} + slack_token: ${{ secrets.SLACK_DOCS_BOT_TOKEN }} diff --git a/package.json b/package.json index a3f06cec11..df86c7f317 100644 --- a/package.json +++ b/package.json @@ -23,6 +23,7 @@ "copy-fixture-data": "node src/tests/scripts/copy-fixture-data.js", "create-translation-health-report": "node src/languages/scripts/create-translation-health-report.js", "debug": "cross-env NODE_ENV=development ENABLED_LANGUAGES=en nodemon --inspect src/frame/server.js", + "delete-orphan-translation-files": "tsx src/workflows/delete-orphan-translation-files.ts", "dev": "cross-env npm start", "find-orphaned-assets": "node src/assets/scripts/find-orphaned-assets.js", "fixture-dev": "cross-env ROOT=src/fixtures/fixtures npm start", diff --git a/src/workflows/delete-orphan-translation-files.ts b/src/workflows/delete-orphan-translation-files.ts new file mode 100644 index 0000000000..7b52075c33 --- /dev/null +++ b/src/workflows/delete-orphan-translation-files.ts @@ -0,0 +1,84 @@ +#!/usr/bin/env node + +/** + * This script will delete files from a translation repo of files that + * only exist there and not "here". Here being the docs repo. + * It will only look at *.md files in `content/` and + * only look at *.md and *.yml files in `data/`. + * + * If executed with `--dry-run` it will only print what it would delete. + * + * To avoid deleting too many files at once, which can make PRs too big, + * there's a `--max ` options which is defaulted to 100. + * + * To run this locally, check out a translation repo and then run it like this: + * + * git clone git@github.com:github/docs-internal.ja-jp.git /tmp/docs-internal.ja-jp + * npm run delete-orphan-translation-files -- /tmp/docs-internal.ja-jp + * + * Note that it doesn't execute `git rm ...` for you. Just regular + * file deletion. It's up to you now to commit and push. + */ + +import fs from 'fs' +import path from 'path' + +import { program } from 'commander' +import walkFiles from 'src/workflows/walk-files.js' +import { ROOT } from 'src/frame/lib/constants.js' + +program + .description('Delete orphan translation files') + .option('--dry-run', 'Just print what it would delete') + .option('--max ', 'Max. number of files to delete', '100') + .argument('', 'path to repo root') + .parse(process.argv) + +const opts = program.opts() + +type Options = { + dryRun: boolean + max: number +} +main(program.args[0], { + dryRun: Boolean(opts.dryRun), + max: parseInt(opts.max, 10), +}) + +function main(root: string, options: Options) { + let deleted = 0 + let countInSync = 0 + let countOrphan = 0 + for (const filePath of getContentAndDataFiles(root)) { + const relPath = path.relative(root, filePath) + if (!fs.existsSync(path.join(ROOT, relPath))) { + countOrphan++ + if (deleted < options.max) { + if (options.dryRun) { + console.log('DELETE', filePath) + } else { + fs.rmSync(filePath) + console.log('DELETED', filePath) + } + deleted++ + + if (deleted >= options.max) { + console.log(`Max. number (${options.max}) of files deleted`) + } + } + } else { + countInSync++ + } + } + console.log(`In conclusion, deleted ${deleted.toLocaleString()} files.`) + console.log( + `There are ${countInSync.toLocaleString()} files in sync and ${countOrphan.toLocaleString()} orphan files in ${root}`, + ) +} + +function getContentAndDataFiles(root: string) { + return [ + ...walkFiles(path.join(root, 'content'), ['.md']), + ...walkFiles(path.join(root, 'data'), ['.md', '.yml']), + ] +}