1
0
mirror of synced 2025-12-19 18:10:59 -05:00

Script to update filepaths to match short titles (#56749)

Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Sarah Schneider
2025-07-23 09:30:18 -04:00
committed by GitHub
parent 68355b8d2b
commit 94b56e8c6b
6 changed files with 292 additions and 192 deletions

View File

@@ -100,6 +100,7 @@
"unallowed-contributions": "tsx src/workflows/unallowed-contributions.ts",
"update-data-and-image-paths": "tsx src/early-access/scripts/update-data-and-image-paths.ts",
"update-enterprise-dates": "tsx src/ghes-releases/scripts/update-enterprise-dates.ts",
"update-filepaths": "tsx src/content-render/scripts/update-filepaths.ts",
"update-internal-links": "tsx src/links/scripts/update-internal-links.ts",
"validate-asset-images": "tsx src/assets/scripts/validate-asset-images.ts",
"validate-github-github-docs-urls": "tsx src/links/scripts/validate-github-github-docs-urls/index.ts",

View File

@@ -172,6 +172,9 @@ async function main(opts, nameTuple) {
// the file is a folder or not. It just needs to know the old and new hrefs.
changeFeaturedLinks(oldHref, newHref)
// Update any links in ChildGroups on the homepage.
changeHomepageLinks(oldHref, newHref, verbose)
if (!undo) {
if (verbose) {
console.log(
@@ -581,6 +584,23 @@ function changeLearningTracks(filePath, oldHref, newHref) {
fs.writeFileSync(filePath, newContent, 'utf-8')
}
function changeHomepageLinks(oldHref, newHref, verbose) {
// Can't deserialize and serialize the Yaml because it would lose
// formatting and comments. So regex replace it.
// Homepage childGroup links do not have a leading '/', so we need to remove that.
const homepageOldHref = oldHref.replace('/', '')
const homepageNewHref = newHref.replace('/', '')
const escapedHomepageOldHref = escapeStringRegexp(homepageOldHref)
const regex = new RegExp(`- ${escapedHomepageOldHref}$`, 'gm')
const homepage = path.join(CONTENT_ROOT, 'index.md')
const oldContent = fs.readFileSync(homepage, 'utf-8')
const newContent = oldContent.replace(regex, `- ${homepageNewHref}`)
if (oldContent !== newContent) {
fs.writeFileSync(homepage, newContent, 'utf-8')
if (verbose) console.log(`Updated homepage links`)
}
}
function changeFeaturedLinks(oldHref, newHref) {
const allFiles = walk(CONTENT_ROOT, {
globs: ['**/*.md'],

View File

@@ -1,110 +0,0 @@
// [start-readme]
//
// This script will say which category pages needs to be renamed
// so they match their respective titles (from the front matter)
//
// [end-readme]
import fs from 'fs'
import path from 'path'
import assert from 'node:assert/strict'
import walk from 'walk-sync'
import chalk from 'chalk'
import GithubSlugger from 'github-slugger'
import { decode } from 'html-entities'
import frontmatter from '@/frame/lib/read-frontmatter'
import { renderContent } from '@/content-render/index'
import { allVersions } from '@/versions/lib/all-versions'
import { ROOT } from '@/frame/lib/constants'
const slugger = new GithubSlugger()
const contentDir = path.join(ROOT, 'content')
const INCLUDE_SUBCATEGORIES = Boolean(JSON.parse(process.env.INCLUDE_SUBCATEGORIES || 'false'))
main()
async function main() {
const englishCategoryIndices = getEnglishCategoryIndices().filter((name) => {
return INCLUDE_SUBCATEGORIES || name.split(path.sep).length < 5
})
const shouldRename = []
for (const categoryIndex of englishCategoryIndices) {
const contents = fs.readFileSync(categoryIndex, 'utf8')
const { data } = frontmatter(contents)
if (data.allowTitleToDifferFromFilename) {
continue
}
const categoryDirPath = path.dirname(categoryIndex)
const categoryDirName = path.basename(categoryDirPath)
const currentVersionObj = allVersions['free-pro-team@latest']
assert(currentVersionObj, "No current version found for 'free-pro-team@latest'")
const context = {
currentLanguage: 'en',
currentVersionObj,
}
const title = await renderContent(data.title, context, { textOnly: true })
slugger.reset()
const expectedSlugs = [slugger.slug(decode(title))]
const shortTitle = data.shortTitle
? await renderContent(data.shortTitle, context, { textOnly: true })
: ''
if (shortTitle && shortTitle !== title) {
expectedSlugs.push(slugger.slug(decode(shortTitle)))
}
// If the directory name already matches the expected slug, bail out now
if (expectedSlugs.includes(categoryDirName)) continue
// Figure out the new path for the category
const categoryDirParentDir = path.dirname(categoryDirPath)
const newPath = path.join(categoryDirParentDir, expectedSlugs.at(-1))
const oldRelativePath = path.relative(ROOT, categoryDirPath)
const newRelativePath = path.relative(ROOT, newPath)
shouldRename.push({ oldRelativePath, newRelativePath })
}
if (shouldRename.length > 0) {
console.log(
chalk.yellow(
`${shouldRename.length} ${
shouldRename.length === 1 ? 'category' : 'categories'
} need to be renamed because their title doesn't match their directory name.`,
),
)
console.log(chalk.dim('Run the following commands to rename them:'))
for (const { oldRelativePath, newRelativePath } of shouldRename) {
console.log(
`./src/content-render/scripts/move-content.js ${oldRelativePath} ${newRelativePath}`,
)
}
} else {
console.log(chalk.green('No categories need to be renamed! 🎉'))
}
}
function getEnglishCategoryIndices() {
const walkOptions = {
globs: ['*/*/**/index.md'],
ignore: [
'{rest,graphql,developers}/**',
'enterprise/admin/index.md',
'**/articles/**',
'**/early-access/**',
],
directories: false,
includeBasePath: true,
}
return walk(contentDir, walkOptions)
}

View File

@@ -1,80 +0,0 @@
// [start-readme]
//
// An automated test checks for discrepancies between filenames and [autogenerated heading IDs](https://www.npmjs.com/package/remark-autolink-headings).
// If the test fails, a human needs to run this script to update the filenames.
//
// **This script is not currently supported on Windows.**
//
// [end-readme]
import fs from 'fs'
import path from 'path'
import walk from 'walk-sync'
import GithubSlugger from 'github-slugger'
import { decode } from 'html-entities'
import frontmatter from '@/frame/lib/read-frontmatter'
import { execFileSync } from 'child_process'
import addRedirectToFrontmatter from '@/redirects/scripts/helpers/add-redirect-to-frontmatter'
const slugger = new GithubSlugger()
const contentDir = path.join(process.cwd(), 'content')
const contentFiles = walk(contentDir, { includeBasePath: true, directories: false }).filter(
(file) => {
return file.endsWith('.md') && !file.endsWith('index.md') && !file.includes('README')
},
)
// TODO fix path separators in the redirect
if (process.platform.startsWith('win')) {
console.log('This script cannot be run on Windows at this time! Exiting...')
process.exit()
}
contentFiles.forEach((oldFullPath) => {
const { data, content } = frontmatter(fs.readFileSync(oldFullPath, 'utf8'))
// skip pages with frontmatter flag
if (data.allowTitleToDifferFromFilename) return
// Slugify the title of each article, where:
// * title = Foo bar
// * slug = foo-bar
// Also allow for the slugified shortTitle to match the filename.
slugger.reset()
const slugTitle = slugger.slug(decode(data.title))
const slugShortTitle = slugger.slug(decode(data.shortTitle))
const allowedSlugs = [slugTitle, slugShortTitle]
// get the basename of each file
// where file = content/foo-bar.md
// and basename = foo-bar
const basename = path.basename(oldFullPath, '.md')
// If the basename is one of the allowed slugs, we're all set here.
if (allowedSlugs.includes(basename)) return
// otherwise rename the file using the slug
const newFullPath = oldFullPath.replace(basename, slugShortTitle || slugTitle)
const oldContentPath = path.relative(process.cwd(), oldFullPath)
const newContentPath = path.relative(process.cwd(), newFullPath)
const gitStatusOfFile = execFileSync('git', ['status', '--porcelain', oldContentPath]).toString()
// if file is untracked, do a regular mv; otherwise do a git mv
if (gitStatusOfFile.includes('??')) {
execFileSync('mv', [oldContentPath, newContentPath])
} else {
execFileSync('git', ['mv', oldContentPath, newContentPath])
}
// then add the old path to the redirect_from frontmatter
// TODO fix path separators on Windows (e.g. \github\extending-github\about-webhooks)
const redirect = path.join('/', path.relative(contentDir, oldFullPath).replace(/.md$/, ''))
data.redirect_from = addRedirectToFrontmatter(data.redirect_from, redirect)
// update the file
fs.writeFileSync(newFullPath, frontmatter.stringify(content, data))
})

View File

@@ -0,0 +1,269 @@
// [start-readme]
//
// Run this script to update filepaths to match short titles (or titles as a fallback).
// Use
// npm run-script -- update-filepaths --help
//
// [end-readme]
import fs from 'fs'
import path from 'path'
import { program } from 'commander'
import GithubSlugger from 'github-slugger'
import { decode } from 'html-entities'
import { execFileSync } from 'child_process'
import walkFiles from '@/workflows/walk-files'
import frontmatter from '@/frame/lib/read-frontmatter'
import { renderContent } from '@/content-render/index'
import fpt from '@/versions/lib/non-enterprise-default-version'
import { allVersions } from '@/versions/lib/all-versions'
import type { PageFrontmatter, Context } from '@/types'
interface ScriptOptions {
force?: boolean
excludeDirs?: boolean
paths?: string[]
dryRun?: boolean
verbose: boolean
}
const context: Context = {
currentLanguage: 'en',
currentVersionObj: allVersions[fpt],
}
program
.description(
'Update filepaths to match short titles, unless frontmatter override is present. Processes both files and directories by default.',
)
.option('-f, --force', 'Update paths even if frontmatter override is present')
.option('-e, --exclude-dirs', 'Exclude directories')
.option(
'-p, --paths [paths...]',
`One or more specific paths to process (e.g., copilot or content/copilot/how-tos/file.md)`,
)
.option('-d, --dry-run', 'Preview changes without actually making them')
.option('-v, --verbose', 'Verbose')
.parse(process.argv)
const options: ScriptOptions = program.opts()
const isDirectoryCheck = (file: string): boolean => file.endsWith('index.md')
// The script takes about 2 seconds per file, so divide by 30 instead of 60 to get the minutes.
const estimateScriptMinutes = (numberOfFiles: number): string => {
const estNum = Math.round(numberOfFiles / 30)
return estNum === 0 ? '<1' : estNum.toString()
}
async function main(): Promise<void> {
const slugger = new GithubSlugger()
const contentDir: string = path.join(process.cwd(), 'content')
// Filter to get all the content files we want to read in.
// Then sort them from longest > shortest so we can do the file moves in order.
const filesToProcess: string[] = sortFiles(filterFiles(contentDir, options))
if (filesToProcess.length === 0) {
console.log('No files to process')
return
}
if (!options.dryRun) {
const estimate = estimateScriptMinutes(filesToProcess.length)
console.log(`Processing ${filesToProcess.length} files`)
console.log(`Estimated time: ${estimate} min\n`)
}
// Process files sequentially to maintain the correct order of operations.
// Files must be moved before directories, and directories must be moved
// from deepest to shallowest to avoid path conflicts during the move operations.
// The result is rather slow, but an asynchronous approach that ensures
// sequential processing would not be faster.
for (const file of filesToProcess) {
try {
slugger.reset()
const result = await processFile(file, slugger, options)
if (!result) continue
moveFile(result, options)
} catch (error) {
console.error(`Failed to process ${file}:`, error)
}
}
}
async function processFile(
file: string,
slugger: GithubSlugger,
options: ScriptOptions,
): Promise<string[] | null> {
const { data } = frontmatter(fs.readFileSync(file, 'utf8')) as unknown as {
data: PageFrontmatter
}
const isDirectory = isDirectoryCheck(file)
// Assess the frontmatter and other conditions to determine if we want to process the path.
const processPage: boolean = determineProcessStatus(data, isDirectory, options)
if (!processPage) return null
let stringToSlugify: string = data.shortTitle || data.title
// Check if we need to process Liquid
if (stringToSlugify.includes('{%')) {
stringToSlugify = await renderContent(stringToSlugify, context, { textOnly: true })
}
// Slugify the short title of each article.
// Where: shortTitle = Foo bar
// Returns: slug = foo-bar
// Fall back to title if shortTitle doesn't exist.
const slug: string = slugger.slug(decode(stringToSlugify))
// Get the basename, depending on whether it's a file or dir.
let basename: string
if (isDirectory) {
// Where: content location = content/foobar/index.md
// Returns: basename = foobar
basename = path.basename(path.dirname(file))
} else {
// Where: content location = content/foobar.md
// Returns: basename = foobar
basename = path.basename(file, '.md')
}
// If slug and basename already match, all set here. Return early.
if (slug === basename) return null
// Build the new path based on file type.
const newPath = isDirectory
? path.join(path.dirname(path.dirname(file)), slug, 'index.md')
: path.join(path.dirname(file), `${slug}.md`)
// Get relative paths and adjust for directories.
const getContentPath = (filePath: string): string => {
const relativePath = path.relative(process.cwd(), filePath)
return isDirectory ? path.dirname(relativePath) : relativePath
}
const contentPath = getContentPath(file)
const newContentPath = getContentPath(newPath)
return [contentPath, newContentPath]
}
function moveFile(result: string[], options: ScriptOptions): void {
const [contentPath, newContentPath] = result
if (options.dryRun) {
console.log('Move:\n', contentPath, '\nto:\n', newContentPath, '\n')
return
}
// Call out to well-tested move-content script for the moving and redirect adding functions.
const stdout = execFileSync(
'tsx',
[
'src/content-render/scripts/move-content.js',
'--no-git',
'--verbose',
contentPath,
newContentPath,
],
{ encoding: 'utf8' },
)
// Grab just the "Moving..." and "Renamed..." output from stdout; otherwise output is too noisy.
const moveMsg = stdout.split('\n').find((l) => l.startsWith('Moving') || l.startsWith('Renamed'))
if (moveMsg && !options.verbose) {
console.log(moveMsg, '\n')
} else {
console.log(stdout, '\n')
}
}
function sortFiles(filesArray: string[]): string[] {
// The order of operations is important.
// We need to return an array so that the moving operations happens in this order:
// 1. Filepaths
// 2. Deepest subdirectory path
// 3. Shallowest subdirectory path (up to category level, e.g., content/product/category)
return filesArray.toSorted((a, b) => {
// If A is a file and B is a directory, A comes first (negative)
if (!isDirectoryCheck(a) && isDirectoryCheck(b)) {
return -1
}
// If A is a directory and B is a file, B comes first (positive)
if (isDirectoryCheck(a) && !isDirectoryCheck(b)) {
return 1
}
// If A and B are both files, neutral
if (!isDirectoryCheck(a) && !isDirectoryCheck(b)) {
return 0
}
// If both are directories, sort by depth (deepest first)
if (isDirectoryCheck(a) && isDirectoryCheck(b)) {
const aDepth = a.split(path.sep).length
const bDepth = b.split(path.sep).length
return bDepth - aDepth // Deeper paths first
}
// This should never be reached, but return 0 for safety
return 0
})
}
function filterFiles(contentDir: string, options: ScriptOptions) {
return walkFiles(contentDir, ['.md']).filter((file: string) => {
// Never move readmes
if (file.endsWith('README.md')) return false
// Never move early access files
if (file.includes('early-access')) return false
// Never move the homepage (content/index.md)
if (path.relative(contentDir, file) === 'index.md') return false
// Never move product landings (content/foo/index.md)
if (path.relative(contentDir, file).split(path.sep)[1] === 'index.md') return false
// If no specific paths are passed, we are done filtering.
if (!options.paths) return true
return options.paths.some((p: string) => {
// Allow either a full content path like "content/foo/bar.md"
// or a top-level directory name like "copilot"
if (!p.startsWith('content')) {
p = path.join('content', p)
}
if (!fs.existsSync(p)) {
console.error(`${p} not found`)
process.exit(1)
}
if (path.relative(process.cwd(), file).startsWith(p)) return true
return false
})
})
}
function determineProcessStatus(
data: PageFrontmatter,
isDirectory: boolean,
options: ScriptOptions,
): boolean {
// Assess the conditions in this order:
// If it's a directory AND we're excluding dirs, do not process it no matter what.
if (isDirectory && options.excludeDirs) {
return false
}
// If the force option is passed, process it no matter what.
if (options.force) {
return true
}
// If the page has the override set, do not process it.
if (data.allowTitleToDifferFromFilename) {
return false
}
// In all other cases, process it.
return true
}
main()

View File

@@ -119,7 +119,7 @@ describe('pages module', () => {
nonMatches.length === 1 ? 'file' : 'files'
} that do not match their slugified titles.\n
${nonMatches.join('\n')}\n
To fix, run src/content-render/scripts/reconcile-filenames-with-ids.js\n\n`
To fix, run: npm run-script update-filepaths --paths [FILEPATHS]\n\n`
expect(nonMatches.length, message).toBe(0)
})