diff --git a/package.json b/package.json index 81de7a9faa..75239089b0 100644 --- a/package.json +++ b/package.json @@ -16,6 +16,7 @@ }, "exports": "./src/frame/server.ts", "scripts": { + "add-content-type": "tsx src/content-render/scripts/add-content-type.ts", "ai-edit": "tsx src/ai-editors/scripts/ai-edit.ts", "all-documents": "tsx src/content-render/scripts/all-documents/cli.ts", "analyze-text": "tsx src/search/scripts/analyze-text.ts", diff --git a/src/content-render/scripts/add-content-type.ts b/src/content-render/scripts/add-content-type.ts new file mode 100644 index 0000000000..f74d5e2177 --- /dev/null +++ b/src/content-render/scripts/add-content-type.ts @@ -0,0 +1,186 @@ +// This script auto-populates the `contentType` frontmatter property based on +// the directory location of the content file. +// Run with: +// npm run-script -- add-content-type --help + +import fs from 'fs' +import path from 'path' +import { program } from 'commander' +import frontmatter from '@/frame/lib/read-frontmatter' +import walkFiles from '@/workflows/walk-files' +import { contentTypesEnum } from '#src/frame/lib/frontmatter.js' +import type { MarkdownFrontmatter } from '@/types' + +const RESPONSIBLE_USE_STRING = 'responsible-use' +const LANDING_TYPE = 'landing' +const RAI_TYPE = 'rai' +const OTHER_TYPE = 'other' + +interface ScriptOptions { + dryRun?: boolean + paths?: string[] + removeType?: boolean + verbose?: boolean +} + +program + .description('Auto-populate the contentType frontmatter property based on file location') + .option( + '-p, --paths [paths...]', + 'One or more specific paths to process (e.g., copilot or content/copilot/how-tos/file.md)', + ) + .option('-r, --remove-type', `Remove the legacy 'type' frontmatter property if present`) + .option('-d, --dry-run', 'Preview changes without modifying files') + .option('-v, --verbose', 'Show detailed output of changes made') + .addHelpText( + 'after', + ` +Possible contentType values: + ${contentTypesEnum.join(', ')} + +Examples: + npm run-script -- add-content-type // runs on all content files, does not remove legacy 'type' prop + npm run-script -- add-content-type --paths copilot actions --remove-type --dry-run + npm run-script -- add-content-type --paths content/copilot/how-tos + npm run-script -- add-content-type --verbose`, + ) + .parse(process.argv) + +const options: ScriptOptions = program.opts() + +const contentDir = path.join(process.cwd(), 'content') + +async function main() { + const filesToProcess: string[] = walkFiles(contentDir, ['.md']).filter((file: string) => { + if (file.endsWith('README.md')) return false + if (file.includes('early-access')) return false + if (!options.paths) return true + return options.paths.some((p: string) => { + // Allow either a full content path like "content/foo/bar.md" + // or a top-level directory name like "copilot" + if (!p.startsWith('content')) { + p = path.join('content', p) + } + if (!fs.existsSync(p)) { + console.error(`${p} not found`) + process.exit(1) + } + if (path.relative(process.cwd(), file).startsWith(p)) return true + }) + }) + + let processedCount = 0 + let updatedCount = 0 + + for (const filePath of filesToProcess) { + try { + const result = processFile(filePath, options) + if (result.processed) processedCount++ + if (result.updated) updatedCount++ + } catch (error) { + console.error( + `Error processing ${filePath}:`, + error instanceof Error ? error.message : String(error), + ) + } + } + + console.log(`\nUpdated ${updatedCount} files out of ${processedCount}`) +} + +function processFile(filePath: string, options: ScriptOptions) { + const fileContent = fs.readFileSync(filePath, 'utf8') + const relativePath = path.relative(contentDir, filePath) + + const { data, content } = frontmatter(fileContent) as unknown as { + data: MarkdownFrontmatter & { contentType?: string } + content: string + } + + if (!data) return { processed: false, updated: false } + + // Remove the legacy type property if option is passed + const removeLegacyType = Boolean(options.removeType && data.type) + + // Skip if contentType already exists and we're not removing legacy type + if (data.contentType && !removeLegacyType) { + console.log(`contentType already set on ${relativePath}`) + return { processed: true, updated: false } + } + + const newContentType = data.contentType || determineContentType(relativePath, data.type || '') + + if (options.dryRun) { + console.log(`\n${relativePath}`) + if (!data.contentType) { + console.log(` ✅ Would set contentType: "${newContentType}"`) + } + if (removeLegacyType) { + console.log(` ✂️ Would remove legacy type: "${data.type}"`) + } + return { processed: true, updated: false } + } + + // Set the contentType property if it doesn't exist + if (!data.contentType) { + data.contentType = newContentType + } + + let legacyTypeValue + if (removeLegacyType) { + legacyTypeValue = data.type + delete data.type + } + + // Write the file back + fs.writeFileSync(filePath, frontmatter.stringify(content, data, { lineWidth: -1 } as any)) + + if (options.verbose) { + console.log(`\n${relativePath}`) + console.log(` ✅ Set contentType: "${newContentType}"`) + if (removeLegacyType) { + console.log(` ✂️ Removed legacy type: "${legacyTypeValue}"`) + } + } + + return { processed: true, updated: true } +} + +function determineContentType(relativePath: string, legacyType: string): string { + // The split path array will be structured like: + // [ 'copilot', 'how-tos', 'troubleshoot', 'index.md' ] + // where the content type we want is in slot 1. + const pathSegments = relativePath.split(path.sep) + + const topLevelDirectory = pathSegments[0] + const derivedContentType = pathSegments[1] + + // There is only one content/index.md, and it's the homepage. + if (topLevelDirectory === 'index.md') return 'homepage' + + // SPECIAL HANDLING FOR RAI + // If a legacy type includes 'rai', use it for the contentType. + // If a directory name includes a responsible-use string, assume the 'rai' type. + if (legacyType === 'rai' || derivedContentType.includes(RESPONSIBLE_USE_STRING)) { + return RAI_TYPE + } + + // When the content directory matches any of the allowed + // content type values (such as 'get-started', + // 'concepts', 'how-tos', 'reference', and 'tutorials'), + // immediately return it. We're satisfied. + if (contentTypesEnum.includes(derivedContentType)) { + return derivedContentType + } + + // There is only one content//index.md file per doc set. + // This index.md is always a landing page. + if (derivedContentType === 'index.md') { + return LANDING_TYPE + } + + // Classify anything else as 'other'. + return OTHER_TYPE +} + +main().catch(console.error) diff --git a/src/frame/lib/frontmatter.js b/src/frame/lib/frontmatter.js index 9d12b70497..097e09fe3a 100644 --- a/src/frame/lib/frontmatter.js +++ b/src/frame/lib/frontmatter.js @@ -17,8 +17,24 @@ const layoutNames = [ false, ] +// DEPRECATED: Use 'contentType' instead of 'type' for new content. +// 'type' exists on ~40% of files but is used only for internal analytics. +// Migration tool: src/content-render/scripts/add-content-type.ts const guideTypes = ['overview', 'quick_start', 'tutorial', 'how_to', 'reference', 'rai'] +// As of July 2025, use 'contentType' rather than 'type'. +export const contentTypesEnum = [ + 'get-started', + 'concepts', + 'how-tos', + 'reference', + 'tutorials', + 'homepage', // Only applies to the sole 'content/index.md' file (the homepage). + 'landing', // Only applies to 'content//index.md' files (product landings). + 'rai', // Only applies to files that live in directories with 'responsible-use' in the name. + 'other', // Everything else. +] + export const schema = { type: 'object', required: ['title', 'versions'], @@ -150,10 +166,18 @@ export const schema = { prefix: { type: 'string' }, }, }, + // DEPRECATED: Use 'contentType' instead of 'type' for new content. + // 'type' exists on ~40% of files but is used only for internal analytics. + // Migration tool: src/content-render/scripts/add-content-type.ts type: { type: 'string', enum: guideTypes, }, + // As of July 2025, use 'contentType' rather than 'type'. + contentType: { + type: 'string', + enum: contentTypesEnum, + }, topics: { type: 'array', }, diff --git a/src/frame/lib/page.ts b/src/frame/lib/page.ts index 4530297786..5da0d76918 100644 --- a/src/frame/lib/page.ts +++ b/src/frame/lib/page.ts @@ -187,8 +187,12 @@ class Page { constructor(opts: PageReadResult) { if (opts.frontmatterErrors && opts.frontmatterErrors.length) { + console.error( + `${opts.frontmatterErrors.length} frontmatter errors trying to load ${opts.fullPath}:`, + ) + console.error(opts.frontmatterErrors) throw new FrontmatterErrorsError( - `${opts.frontmatterErrors.length} frontmatter errors trying to load ${opts.fullPath}`, + `${opts.frontmatterErrors.length} frontmatter errors in ${opts.fullPath}`, opts.frontmatterErrors, ) } diff --git a/src/types.ts b/src/types.ts index 1f78aa94c7..d624ab9ea5 100644 --- a/src/types.ts +++ b/src/types.ts @@ -471,4 +471,6 @@ export type MarkdownFrontmatter = { versions: FrontmatterVersions subcategory?: boolean hidden?: boolean + type?: string + contentType?: string } diff --git a/src/workflows/fm-utils.ts b/src/workflows/fm-utils.ts index 7a1d6c8fb9..b6d39e53ce 100644 --- a/src/workflows/fm-utils.ts +++ b/src/workflows/fm-utils.ts @@ -8,7 +8,7 @@ export function checkContentType(filePaths: string[], type: string) { const unallowedChangedFiles = [] for (const filePath of filePaths) { const { data } = matter(readFileSync(filePath, 'utf8')) - if (data.type === type) { + if (data.type === type || data.contentType === type) { unallowedChangedFiles.push(filePath) } } diff --git a/src/workflows/unallowed-contributions.ts b/src/workflows/unallowed-contributions.ts index 39a8654052..526774aa0d 100755 --- a/src/workflows/unallowed-contributions.ts +++ b/src/workflows/unallowed-contributions.ts @@ -46,7 +46,7 @@ async function main() { const listUnallowedChangedFiles = unallowedChangedFiles.map((file) => `\n - ${file}`).join('') const listUnallowedFiles = filters.notAllowed.map((file: string) => `\n - ${file}`).join('') - const reviewMessage = `👋 Hey there spelunker. It looks like you've modified some files that we can't accept as contributions:${listUnallowedChangedFiles}\n\nYou'll need to revert all of the files you changed that match that list using [GitHub Desktop](https://docs.github.com/en/free-pro-team@latest/desktop/contributing-and-collaborating-using-github-desktop/managing-commits/reverting-a-commit-in-github-desktop) or \`git checkout origin/main \`. Once you get those files reverted, we can continue with the review process. :octocat:\n\nThe complete list of files we can't accept are:${listUnallowedFiles}\n\nWe also can't accept contributions to files in the content directory with frontmatter \`type: rai\`.` + const reviewMessage = `👋 Hey there spelunker. It looks like you've modified some files that we can't accept as contributions:${listUnallowedChangedFiles}\n\nYou'll need to revert all of the files you changed that match that list using [GitHub Desktop](https://docs.github.com/en/free-pro-team@latest/desktop/contributing-and-collaborating-using-github-desktop/managing-commits/reverting-a-commit-in-github-desktop) or \`git checkout origin/main \`. Once you get those files reverted, we can continue with the review process. :octocat:\n\nThe complete list of files we can't accept are:${listUnallowedFiles}\n\nWe also can't accept contributions to files in the content directory with frontmatter \`type: rai\` or \`contentType: rai\`.` let workflowFailMessage = "It looks like you've modified some files that we can't accept as contributions."