From 94b56e8c6bbcc81931514ee594bf88a46f7f6da9 Mon Sep 17 00:00:00 2001 From: Sarah Schneider Date: Wed, 23 Jul 2025 09:30:18 -0400 Subject: [PATCH] Script to update filepaths to match short titles (#56749) Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- package.json | 1 + src/content-render/scripts/move-content.js | 22 +- .../reconcile-category-dirs-with-ids.js | 110 ------- .../scripts/reconcile-filenames-with-ids.js | 80 ------ .../scripts/update-filepaths.ts | 269 ++++++++++++++++++ src/frame/tests/pages.js | 2 +- 6 files changed, 292 insertions(+), 192 deletions(-) delete mode 100755 src/content-render/scripts/reconcile-category-dirs-with-ids.js delete mode 100755 src/content-render/scripts/reconcile-filenames-with-ids.js create mode 100755 src/content-render/scripts/update-filepaths.ts diff --git a/package.json b/package.json index f4dd1e63a7..81de7a9faa 100644 --- a/package.json +++ b/package.json @@ -100,6 +100,7 @@ "unallowed-contributions": "tsx src/workflows/unallowed-contributions.ts", "update-data-and-image-paths": "tsx src/early-access/scripts/update-data-and-image-paths.ts", "update-enterprise-dates": "tsx src/ghes-releases/scripts/update-enterprise-dates.ts", + "update-filepaths": "tsx src/content-render/scripts/update-filepaths.ts", "update-internal-links": "tsx src/links/scripts/update-internal-links.ts", "validate-asset-images": "tsx src/assets/scripts/validate-asset-images.ts", "validate-github-github-docs-urls": "tsx src/links/scripts/validate-github-github-docs-urls/index.ts", diff --git a/src/content-render/scripts/move-content.js b/src/content-render/scripts/move-content.js index ed9a0adb6f..aa49550b89 100755 --- a/src/content-render/scripts/move-content.js +++ b/src/content-render/scripts/move-content.js @@ -172,6 +172,9 @@ async function main(opts, nameTuple) { // the file is a folder or not. It just needs to know the old and new hrefs. changeFeaturedLinks(oldHref, newHref) + // Update any links in ChildGroups on the homepage. + changeHomepageLinks(oldHref, newHref, verbose) + if (!undo) { if (verbose) { console.log( @@ -581,6 +584,23 @@ function changeLearningTracks(filePath, oldHref, newHref) { fs.writeFileSync(filePath, newContent, 'utf-8') } +function changeHomepageLinks(oldHref, newHref, verbose) { + // Can't deserialize and serialize the Yaml because it would lose + // formatting and comments. So regex replace it. + // Homepage childGroup links do not have a leading '/', so we need to remove that. + const homepageOldHref = oldHref.replace('/', '') + const homepageNewHref = newHref.replace('/', '') + const escapedHomepageOldHref = escapeStringRegexp(homepageOldHref) + const regex = new RegExp(`- ${escapedHomepageOldHref}$`, 'gm') + const homepage = path.join(CONTENT_ROOT, 'index.md') + const oldContent = fs.readFileSync(homepage, 'utf-8') + const newContent = oldContent.replace(regex, `- ${homepageNewHref}`) + if (oldContent !== newContent) { + fs.writeFileSync(homepage, newContent, 'utf-8') + if (verbose) console.log(`Updated homepage links`) + } +} + function changeFeaturedLinks(oldHref, newHref) { const allFiles = walk(CONTENT_ROOT, { globs: ['**/*.md'], @@ -588,7 +608,7 @@ function changeFeaturedLinks(oldHref, newHref) { directories: false, }).filter((file) => !file.includes('README.md')) - const regex = new RegExp(`(^|%})${escapeStringRegexp(oldHref)}($|{%)`) + const regex = new RegExp(`(^|%} )${escapeStringRegexp(oldHref)}($| {%)`) for (const file of allFiles) { let changed = false diff --git a/src/content-render/scripts/reconcile-category-dirs-with-ids.js b/src/content-render/scripts/reconcile-category-dirs-with-ids.js deleted file mode 100755 index 244ce53770..0000000000 --- a/src/content-render/scripts/reconcile-category-dirs-with-ids.js +++ /dev/null @@ -1,110 +0,0 @@ -// [start-readme] -// -// This script will say which category pages needs to be renamed -// so they match their respective titles (from the front matter) -// -// [end-readme] - -import fs from 'fs' -import path from 'path' -import assert from 'node:assert/strict' - -import walk from 'walk-sync' -import chalk from 'chalk' -import GithubSlugger from 'github-slugger' -import { decode } from 'html-entities' - -import frontmatter from '@/frame/lib/read-frontmatter' -import { renderContent } from '@/content-render/index' -import { allVersions } from '@/versions/lib/all-versions' -import { ROOT } from '@/frame/lib/constants' - -const slugger = new GithubSlugger() - -const contentDir = path.join(ROOT, 'content') - -const INCLUDE_SUBCATEGORIES = Boolean(JSON.parse(process.env.INCLUDE_SUBCATEGORIES || 'false')) - -main() - -async function main() { - const englishCategoryIndices = getEnglishCategoryIndices().filter((name) => { - return INCLUDE_SUBCATEGORIES || name.split(path.sep).length < 5 - }) - - const shouldRename = [] - - for (const categoryIndex of englishCategoryIndices) { - const contents = fs.readFileSync(categoryIndex, 'utf8') - const { data } = frontmatter(contents) - - if (data.allowTitleToDifferFromFilename) { - continue - } - - const categoryDirPath = path.dirname(categoryIndex) - const categoryDirName = path.basename(categoryDirPath) - - const currentVersionObj = allVersions['free-pro-team@latest'] - assert(currentVersionObj, "No current version found for 'free-pro-team@latest'") - const context = { - currentLanguage: 'en', - currentVersionObj, - } - const title = await renderContent(data.title, context, { textOnly: true }) - slugger.reset() - const expectedSlugs = [slugger.slug(decode(title))] - const shortTitle = data.shortTitle - ? await renderContent(data.shortTitle, context, { textOnly: true }) - : '' - if (shortTitle && shortTitle !== title) { - expectedSlugs.push(slugger.slug(decode(shortTitle))) - } - - // If the directory name already matches the expected slug, bail out now - if (expectedSlugs.includes(categoryDirName)) continue - - // Figure out the new path for the category - const categoryDirParentDir = path.dirname(categoryDirPath) - const newPath = path.join(categoryDirParentDir, expectedSlugs.at(-1)) - - const oldRelativePath = path.relative(ROOT, categoryDirPath) - const newRelativePath = path.relative(ROOT, newPath) - shouldRename.push({ oldRelativePath, newRelativePath }) - } - - if (shouldRename.length > 0) { - console.log( - chalk.yellow( - `${shouldRename.length} ${ - shouldRename.length === 1 ? 'category' : 'categories' - } need to be renamed because their title doesn't match their directory name.`, - ), - ) - console.log(chalk.dim('Run the following commands to rename them:')) - - for (const { oldRelativePath, newRelativePath } of shouldRename) { - console.log( - `./src/content-render/scripts/move-content.js ${oldRelativePath} ${newRelativePath}`, - ) - } - } else { - console.log(chalk.green('No categories need to be renamed! 🎉')) - } -} - -function getEnglishCategoryIndices() { - const walkOptions = { - globs: ['*/*/**/index.md'], - ignore: [ - '{rest,graphql,developers}/**', - 'enterprise/admin/index.md', - '**/articles/**', - '**/early-access/**', - ], - directories: false, - includeBasePath: true, - } - - return walk(contentDir, walkOptions) -} diff --git a/src/content-render/scripts/reconcile-filenames-with-ids.js b/src/content-render/scripts/reconcile-filenames-with-ids.js deleted file mode 100755 index 94796101fc..0000000000 --- a/src/content-render/scripts/reconcile-filenames-with-ids.js +++ /dev/null @@ -1,80 +0,0 @@ -// [start-readme] -// -// An automated test checks for discrepancies between filenames and [autogenerated heading IDs](https://www.npmjs.com/package/remark-autolink-headings). -// If the test fails, a human needs to run this script to update the filenames. -// -// **This script is not currently supported on Windows.** -// -// [end-readme] - -import fs from 'fs' -import path from 'path' -import walk from 'walk-sync' -import GithubSlugger from 'github-slugger' -import { decode } from 'html-entities' -import frontmatter from '@/frame/lib/read-frontmatter' -import { execFileSync } from 'child_process' -import addRedirectToFrontmatter from '@/redirects/scripts/helpers/add-redirect-to-frontmatter' - -const slugger = new GithubSlugger() - -const contentDir = path.join(process.cwd(), 'content') - -const contentFiles = walk(contentDir, { includeBasePath: true, directories: false }).filter( - (file) => { - return file.endsWith('.md') && !file.endsWith('index.md') && !file.includes('README') - }, -) - -// TODO fix path separators in the redirect -if (process.platform.startsWith('win')) { - console.log('This script cannot be run on Windows at this time! Exiting...') - process.exit() -} - -contentFiles.forEach((oldFullPath) => { - const { data, content } = frontmatter(fs.readFileSync(oldFullPath, 'utf8')) - - // skip pages with frontmatter flag - if (data.allowTitleToDifferFromFilename) return - - // Slugify the title of each article, where: - // * title = Foo bar - // * slug = foo-bar - // Also allow for the slugified shortTitle to match the filename. - slugger.reset() - const slugTitle = slugger.slug(decode(data.title)) - const slugShortTitle = slugger.slug(decode(data.shortTitle)) - const allowedSlugs = [slugTitle, slugShortTitle] - - // get the basename of each file - // where file = content/foo-bar.md - // and basename = foo-bar - const basename = path.basename(oldFullPath, '.md') - - // If the basename is one of the allowed slugs, we're all set here. - if (allowedSlugs.includes(basename)) return - - // otherwise rename the file using the slug - const newFullPath = oldFullPath.replace(basename, slugShortTitle || slugTitle) - - const oldContentPath = path.relative(process.cwd(), oldFullPath) - const newContentPath = path.relative(process.cwd(), newFullPath) - - const gitStatusOfFile = execFileSync('git', ['status', '--porcelain', oldContentPath]).toString() - - // if file is untracked, do a regular mv; otherwise do a git mv - if (gitStatusOfFile.includes('??')) { - execFileSync('mv', [oldContentPath, newContentPath]) - } else { - execFileSync('git', ['mv', oldContentPath, newContentPath]) - } - - // then add the old path to the redirect_from frontmatter - // TODO fix path separators on Windows (e.g. \github\extending-github\about-webhooks) - const redirect = path.join('/', path.relative(contentDir, oldFullPath).replace(/.md$/, '')) - data.redirect_from = addRedirectToFrontmatter(data.redirect_from, redirect) - - // update the file - fs.writeFileSync(newFullPath, frontmatter.stringify(content, data)) -}) diff --git a/src/content-render/scripts/update-filepaths.ts b/src/content-render/scripts/update-filepaths.ts new file mode 100755 index 0000000000..c2acef0ba3 --- /dev/null +++ b/src/content-render/scripts/update-filepaths.ts @@ -0,0 +1,269 @@ +// [start-readme] +// +// Run this script to update filepaths to match short titles (or titles as a fallback). +// Use +// npm run-script -- update-filepaths --help +// +// [end-readme] + +import fs from 'fs' +import path from 'path' +import { program } from 'commander' +import GithubSlugger from 'github-slugger' +import { decode } from 'html-entities' +import { execFileSync } from 'child_process' +import walkFiles from '@/workflows/walk-files' +import frontmatter from '@/frame/lib/read-frontmatter' +import { renderContent } from '@/content-render/index' +import fpt from '@/versions/lib/non-enterprise-default-version' +import { allVersions } from '@/versions/lib/all-versions' +import type { PageFrontmatter, Context } from '@/types' + +interface ScriptOptions { + force?: boolean + excludeDirs?: boolean + paths?: string[] + dryRun?: boolean + verbose: boolean +} + +const context: Context = { + currentLanguage: 'en', + currentVersionObj: allVersions[fpt], +} + +program + .description( + 'Update filepaths to match short titles, unless frontmatter override is present. Processes both files and directories by default.', + ) + .option('-f, --force', 'Update paths even if frontmatter override is present') + .option('-e, --exclude-dirs', 'Exclude directories') + .option( + '-p, --paths [paths...]', + `One or more specific paths to process (e.g., copilot or content/copilot/how-tos/file.md)`, + ) + .option('-d, --dry-run', 'Preview changes without actually making them') + .option('-v, --verbose', 'Verbose') + .parse(process.argv) + +const options: ScriptOptions = program.opts() + +const isDirectoryCheck = (file: string): boolean => file.endsWith('index.md') + +// The script takes about 2 seconds per file, so divide by 30 instead of 60 to get the minutes. +const estimateScriptMinutes = (numberOfFiles: number): string => { + const estNum = Math.round(numberOfFiles / 30) + return estNum === 0 ? '<1' : estNum.toString() +} + +async function main(): Promise { + const slugger = new GithubSlugger() + const contentDir: string = path.join(process.cwd(), 'content') + // Filter to get all the content files we want to read in. + // Then sort them from longest > shortest so we can do the file moves in order. + const filesToProcess: string[] = sortFiles(filterFiles(contentDir, options)) + + if (filesToProcess.length === 0) { + console.log('No files to process') + return + } + + if (!options.dryRun) { + const estimate = estimateScriptMinutes(filesToProcess.length) + console.log(`Processing ${filesToProcess.length} files`) + console.log(`Estimated time: ${estimate} min\n`) + } + + // Process files sequentially to maintain the correct order of operations. + // Files must be moved before directories, and directories must be moved + // from deepest to shallowest to avoid path conflicts during the move operations. + // The result is rather slow, but an asynchronous approach that ensures + // sequential processing would not be faster. + for (const file of filesToProcess) { + try { + slugger.reset() + + const result = await processFile(file, slugger, options) + if (!result) continue + + moveFile(result, options) + } catch (error) { + console.error(`Failed to process ${file}:`, error) + } + } +} + +async function processFile( + file: string, + slugger: GithubSlugger, + options: ScriptOptions, +): Promise { + const { data } = frontmatter(fs.readFileSync(file, 'utf8')) as unknown as { + data: PageFrontmatter + } + + const isDirectory = isDirectoryCheck(file) + + // Assess the frontmatter and other conditions to determine if we want to process the path. + const processPage: boolean = determineProcessStatus(data, isDirectory, options) + if (!processPage) return null + + let stringToSlugify: string = data.shortTitle || data.title + + // Check if we need to process Liquid + if (stringToSlugify.includes('{%')) { + stringToSlugify = await renderContent(stringToSlugify, context, { textOnly: true }) + } + + // Slugify the short title of each article. + // Where: shortTitle = Foo bar + // Returns: slug = foo-bar + // Fall back to title if shortTitle doesn't exist. + const slug: string = slugger.slug(decode(stringToSlugify)) + + // Get the basename, depending on whether it's a file or dir. + let basename: string + if (isDirectory) { + // Where: content location = content/foobar/index.md + // Returns: basename = foobar + basename = path.basename(path.dirname(file)) + } else { + // Where: content location = content/foobar.md + // Returns: basename = foobar + basename = path.basename(file, '.md') + } + + // If slug and basename already match, all set here. Return early. + if (slug === basename) return null + + // Build the new path based on file type. + const newPath = isDirectory + ? path.join(path.dirname(path.dirname(file)), slug, 'index.md') + : path.join(path.dirname(file), `${slug}.md`) + + // Get relative paths and adjust for directories. + const getContentPath = (filePath: string): string => { + const relativePath = path.relative(process.cwd(), filePath) + return isDirectory ? path.dirname(relativePath) : relativePath + } + + const contentPath = getContentPath(file) + const newContentPath = getContentPath(newPath) + + return [contentPath, newContentPath] +} + +function moveFile(result: string[], options: ScriptOptions): void { + const [contentPath, newContentPath] = result + + if (options.dryRun) { + console.log('Move:\n', contentPath, '\nto:\n', newContentPath, '\n') + return + } + + // Call out to well-tested move-content script for the moving and redirect adding functions. + const stdout = execFileSync( + 'tsx', + [ + 'src/content-render/scripts/move-content.js', + '--no-git', + '--verbose', + contentPath, + newContentPath, + ], + { encoding: 'utf8' }, + ) + + // Grab just the "Moving..." and "Renamed..." output from stdout; otherwise output is too noisy. + const moveMsg = stdout.split('\n').find((l) => l.startsWith('Moving') || l.startsWith('Renamed')) + if (moveMsg && !options.verbose) { + console.log(moveMsg, '\n') + } else { + console.log(stdout, '\n') + } +} + +function sortFiles(filesArray: string[]): string[] { + // The order of operations is important. + // We need to return an array so that the moving operations happens in this order: + // 1. Filepaths + // 2. Deepest subdirectory path + // 3. Shallowest subdirectory path (up to category level, e.g., content/product/category) + return filesArray.toSorted((a, b) => { + // If A is a file and B is a directory, A comes first (negative) + if (!isDirectoryCheck(a) && isDirectoryCheck(b)) { + return -1 + } + // If A is a directory and B is a file, B comes first (positive) + if (isDirectoryCheck(a) && !isDirectoryCheck(b)) { + return 1 + } + // If A and B are both files, neutral + if (!isDirectoryCheck(a) && !isDirectoryCheck(b)) { + return 0 + } + // If both are directories, sort by depth (deepest first) + if (isDirectoryCheck(a) && isDirectoryCheck(b)) { + const aDepth = a.split(path.sep).length + const bDepth = b.split(path.sep).length + return bDepth - aDepth // Deeper paths first + } + + // This should never be reached, but return 0 for safety + return 0 + }) +} + +function filterFiles(contentDir: string, options: ScriptOptions) { + return walkFiles(contentDir, ['.md']).filter((file: string) => { + // Never move readmes + if (file.endsWith('README.md')) return false + // Never move early access files + if (file.includes('early-access')) return false + // Never move the homepage (content/index.md) + if (path.relative(contentDir, file) === 'index.md') return false + // Never move product landings (content/foo/index.md) + if (path.relative(contentDir, file).split(path.sep)[1] === 'index.md') return false + + // If no specific paths are passed, we are done filtering. + if (!options.paths) return true + + return options.paths.some((p: string) => { + // Allow either a full content path like "content/foo/bar.md" + // or a top-level directory name like "copilot" + if (!p.startsWith('content')) { + p = path.join('content', p) + } + if (!fs.existsSync(p)) { + console.error(`${p} not found`) + process.exit(1) + } + if (path.relative(process.cwd(), file).startsWith(p)) return true + return false + }) + }) +} + +function determineProcessStatus( + data: PageFrontmatter, + isDirectory: boolean, + options: ScriptOptions, +): boolean { + // Assess the conditions in this order: + // If it's a directory AND we're excluding dirs, do not process it no matter what. + if (isDirectory && options.excludeDirs) { + return false + } + // If the force option is passed, process it no matter what. + if (options.force) { + return true + } + // If the page has the override set, do not process it. + if (data.allowTitleToDifferFromFilename) { + return false + } + // In all other cases, process it. + return true +} + +main() diff --git a/src/frame/tests/pages.js b/src/frame/tests/pages.js index b5a1b1d0a2..1f3eee1cf5 100644 --- a/src/frame/tests/pages.js +++ b/src/frame/tests/pages.js @@ -119,7 +119,7 @@ describe('pages module', () => { nonMatches.length === 1 ? 'file' : 'files' } that do not match their slugified titles.\n ${nonMatches.join('\n')}\n - To fix, run src/content-render/scripts/reconcile-filenames-with-ids.js\n\n` + To fix, run: npm run-script update-filepaths --paths [FILEPATHS]\n\n` expect(nonMatches.length, message).toBe(0) })