1
0
mirror of synced 2025-12-19 18:10:59 -05:00

Refactor ai-tools CLI (#58580)

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Sarah Schneider
2025-12-01 10:58:38 -05:00
committed by GitHub
parent c9a50c6650
commit 64813b673a
4 changed files with 409 additions and 120 deletions

View File

@@ -34,10 +34,30 @@ interface ChatCompletionResponse {
}
}
export async function callModelsApi(promptWithContent: ChatCompletionRequest): Promise<string> {
export async function callModelsApi(
promptWithContent: ChatCompletionRequest,
verbose = false,
): Promise<string> {
let aiResponse: ChatCompletionChoice
// Set default model if none specified
if (!promptWithContent.model) {
promptWithContent.model = 'openai/gpt-4o'
if (verbose) {
console.log('⚠️ No model specified, using default: openai/gpt-4o')
}
}
try {
// Create an AbortController for timeout handling
const controller = new AbortController()
const timeoutId = setTimeout(() => controller.abort(), 180000) // 3 minutes
const startTime = Date.now()
if (verbose) {
console.log(`🚀 Making API request to GitHub Models using ${promptWithContent.model}...`)
}
const response = await fetch(modelsCompletionsEndpoint, {
method: 'post',
body: JSON.stringify(promptWithContent),
@@ -45,16 +65,80 @@ export async function callModelsApi(promptWithContent: ChatCompletionRequest): P
'Content-Type': 'application/json',
Authorization: `Bearer ${process.env.GITHUB_TOKEN}`,
'X-GitHub-Api-Version': '2022-11-28',
Accept: 'Accept: application/vnd.github+json',
Accept: 'application/vnd.github+json',
},
signal: controller.signal,
})
const fetchTime = Date.now() - startTime
if (verbose) {
console.log(`⏱️ API response received in ${fetchTime}ms`)
}
clearTimeout(timeoutId)
if (!response.ok) {
let errorMessage = `HTTP error! status: ${response.status} - ${response.statusText}`
// Try to get more detailed error information
try {
const errorBody = await response.json()
if (errorBody.error && errorBody.error.message) {
errorMessage += ` - ${errorBody.error.message}`
}
} catch {
// If we can't parse error body, continue with basic error
}
// Add helpful hints for common errors
if (response.status === 401) {
errorMessage += ' (Check your GITHUB_TOKEN)'
} else if (response.status === 400) {
errorMessage += ' (This may be due to an invalid model or malformed request)'
} else if (response.status === 429) {
errorMessage += ' (Rate limit exceeded - try again later)'
}
throw new Error(errorMessage)
}
const data: ChatCompletionResponse = await response.json()
if (!data.choices || data.choices.length === 0) {
throw new Error('No response choices returned from API')
}
aiResponse = data.choices[0]
if (verbose) {
const totalTime = Date.now() - startTime
console.log(`✅ Total API call completed in ${totalTime}ms`)
if (data.usage) {
console.log(
`📊 Tokens: ${data.usage.prompt_tokens} prompt + ${data.usage.completion_tokens} completion = ${data.usage.total_tokens} total`,
)
}
}
} catch (error) {
console.error('Error calling GitHub Models REST API')
if (error instanceof Error) {
if (error.name === 'AbortError') {
throw new Error('API call timed out after 3 minutes')
}
console.error('Error calling GitHub Models REST API:', error.message)
}
throw error
}
return aiResponse.message.content
return cleanAIResponse(aiResponse.message.content)
}
// Helper function to clean up AI response content
function cleanAIResponse(content: string): string {
// Remove markdown code blocks
return content
.replace(/^```[\w]*\n/gm, '') // Remove opening code blocks
.replace(/\n```$/gm, '') // Remove closing code blocks at end
.replace(/\n```\n/gm, '\n') // Remove standalone closing code blocks
.trim()
}

View File

@@ -2,37 +2,20 @@ You are an expert SEO content optimizer specializing in GitHub documentation.
Your task is to analyze a GitHub Docs content file and generate or optimize
the intro frontmatter property following Google's meta description best practices.
## Your mission
Generate a single, concise intro (one simple sentence maximum - NO colons, NO detailed explanations) that:
## Core Requirements
* Starts with an action verb (e.g., "Learn," "Discover," "Access," "Explore," "Configure," "Set up," "Build")
* **Uses developer-friendly, direct language** - avoid marketing jargon and corporate buzzwords
* **Prioritizes conciseness over completeness** - cut unnecessary words ruthlessly
* Accurately summarizes the content's core value proposition
* Includes relevant keywords naturally without stuffing
* Follows Google's snippet guidelines (descriptive, informative, compelling)
* Is version-agnostic (no {% ifversion %} blocks, but {% data variables.* %} and {% data reusables.* %} are acceptable)
* Matches the content type (article/category/mapTopic) requirements
* **Goes beyond title restatement** - summarizes the complete article value, not just rephrasing the title
* **Lists concrete steps or outcomes** - what users will actually do or accomplish
* **Limits lists to 2-3 items maximum** - avoid long comma-separated sequences that feel overwhelming
**Primary constraints (must-haves):**
* Start with action verb ("Learn," "Access," "Explore," "Configure," "Set up," "Build")
* One sentence maximum - NO colons, NO detailed explanations
* Avoid buzzwords: "leverage," "optimize," "maximize," "enhance," "streamline," "empower," "revolutionize," "seamlessly," "comprehensive," "enterprise-grade," "cutting-edge," "innovative," "game-changing," "next-generation," "world-class," "best-in-class," "state-of-the-art," "industry-leading," "robust," "scalable," "mission-critical," "synergistic," "holistic," "strategic," "transformative"
* Different approach than title - don't start with same words/phrases
* Lists 2-3 concrete outcomes maximum
## SEO scoring criteria (1-10 scale)
**10-9 (Excellent)**: Strong action verb, comprehensive content summary, optimal keyword density, clear unique value beyond title, perfect length
**8-7 (Good)**: Action verb present, good content representation, decent keywords, some unique value, appropriate length
**6-5 (Fair)**: Weak action verb or missing, partial content coverage, basic keywords, minimal value beyond title
**4-3 (Poor)**: No action verb, limited content representation, few relevant keywords, mostly restates title
**2-1 (Very Poor)**: Vague or misleading, no clear value proposition, poor keyword usage, completely redundant with title
## Analysis process
1. **Content resolution**: Keep {% data variables.* %} and {% data reusables.* %} but avoid {% ifversion %} blocks
2. **Content analysis**: Identify the article's purpose, target audience, key concepts, and user outcomes
3. **Category detection**: For index pages, analyze child content themes and collective value
4. **SEO optimization**: Use strong action verbs, developer-friendly language, concrete outcomes, and relevant keywords while avoiding corporate buzzwords
**Secondary optimizations (nice-to-haves):**
* Include relevant keywords naturally
* Version-agnostic ({% data variables.* %} OK, avoid {% ifversion %})
* Follow Google snippet guidelines
* Cut unnecessary words ruthlessly
**Content Summarization vs. Title Restatement**:
@@ -47,7 +30,7 @@ Generate a single, concise intro (one simple sentence maximum - NO colons, NO de
- Better: "Use {% data variables.product.prodname_copilot %} chat and code completion to research syntax, practice coding, and master new programming languages faster"
**Use concise, developer-friendly language ({% data variables.* %} OK)**:
- Better intro: "Evaluate use cases, configure security settings, and run pilot trials to successfully deploy {% data variables.copilot.copilot_coding_agent %} in your org"
- Better intro: "Evaluate use cases, configure security settings, and run pilot trials to deploy {% data variables.copilot.copilot_coding_agent %} in your org"
**Avoid overly long lists and colon constructions**:
- Too long: "Scope issues, pick suitable tasks, iterate via PR comments, add repo instructions, enable MCP tools, and preinstall dependencies"
@@ -55,24 +38,13 @@ Generate a single, concise intro (one simple sentence maximum - NO colons, NO de
- Better: "Scope tasks, configure custom instructions, and iterate on pull requests to improve {% data variables.copilot.copilot_coding_agent %} performance"
- Better: "Use {% data variables.product.prodname_copilot %} features like chat and code completion to research syntax, build programs, and learn new programming languages faster"
**Tone Guidelines**:
- **Developer-friendly**: Use direct, practical language
- **Concise over complete**: Cut words ruthlessly
- **Action-oriented**: List what users will actually do
- **Avoid buzzwords**: Skip marketing language and corporate jargon
- **Use concrete verbs**: Instead of "maximize/optimize/enhance" → use "improve," "boost," "increase," or just describe the outcome directly
- **Limit lists**: Maximum 2-3 items in comma-separated lists - prefer flowing sentences over exhaustive enumerations
- **Avoid colon constructions**: Don't use "Do X: detailed explanation of A, B, and C" format - keep it simple and direct
- **Avoid title similarity**: Don't start with the same words/phrases as the article title - approach the topic from a different angle
## Quality Checklist
The intro should answer: "What specific steps will I take?" rather than "What will this comprehensive solution provide?"
## Analysis Process
1. **First Draft**: Generate an initial improved intro following all guidelines above
2. **Title Check**: Compare your draft to the article title - if it starts with similar words, rewrite with a different approach
3. **Self-Review**: Evaluate your draft against the SEO scoring criteria and tone guidelines
4. **Refinement**: If the draft contains buzzwords, weak verbs, title similarity, or scores below 8/10, create a refined version
**Structure**: Action verb + 2-3 concrete outcomes + under 350 characters
**Language**: Direct, practical developer language (no marketing jargon)
**Focus**: What users will DO, not what solution "provides"
**Uniqueness**: Different angle from article title
**Simplicity**: No colons, no complex lists, flowing sentences
## Output format
@@ -84,27 +56,12 @@ Title: "[Article title from frontmatter]"
Original intro: "[Current intro from the article, or "No intro" if none exists]"
Original SEO score: [X]/10
------------------------
Improved intro: "[Single, concise intro that summarizes the article's full content value, not just restating the title]"
Improved SEO score: [X]/10
SEO-friendly alternative: "[Single, concise intro that summarizes the article's full content value, not just restating the title]"
------------------------
```
Note: The improved score should reflect your best attempt after internal refinement.
## Character limits by content type
**Priority: Conciseness over character limits**
- Focus on being as concise as possible while maintaining clarity
- Cut every unnecessary word before considering length
- Developer-friendly brevity trumps hitting character targets
**Technical limits** (for reference):
- **Articles**: Maximum 354 characters
- **Categories**: Maximum 362 characters
- **Map Topics**: Maximum 362 characters
@@ -124,4 +81,18 @@ Note: The improved score should reflect your best attempt after internal refinem
- {% data variables.product.prodname_copilot %} = "GitHub Copilot"
- {% data variables.copilot.copilot_coding_agent %} = "Copilot Coding Agent"
Focus on creating intros that would make sense to someone discovering this content through Google search, clearly communicating the value and relevance of the article.
Focus on creating intros that would make sense to someone discovering this content through Google search, clearly communicating the value and relevance of the article.
<!-- IF_WRITE_MODE -->
## WRITE MODE INSTRUCTIONS
**CRITICAL**: You are in write mode. Output ONLY the YAML frontmatter property to update.
- Return just: `intro: "your improved intro text"`
- Do NOT include analysis, scoring, explanations, or formatting
- Do NOT wrap in markdown code blocks or ```yaml
- Do NOT include the analysis format shown above
- Just return the clean YAML property line
<!-- END_WRITE_MODE -->

View File

@@ -6,4 +6,6 @@ messages:
content: >-
Review this content file according to the provided system prompt.
{{input}}
model: openai/gpt-5
model: openai/gpt-4o # Reliable model that works
temperature: 0.3 # Lower temperature for consistent results
max_completion_tokens: 4000 # Maximum response length

View File

@@ -7,6 +7,8 @@ import ora from 'ora'
import { execSync } from 'child_process'
import { callModelsApi } from '@/ai-tools/lib/call-models-api'
import dotenv from 'dotenv'
import readFrontmatter from '@/frame/lib/read-frontmatter'
import { schema } from '@/frame/lib/frontmatter'
dotenv.config({ quiet: true })
const __dirname = path.dirname(fileURLToPath(import.meta.url))
@@ -28,35 +30,92 @@ if (!process.env.GITHUB_TOKEN) {
}
}
interface EditorType {
description: string
// Dynamically discover available editor types from prompt files
const getAvailableEditorTypes = (): string[] => {
const editorTypes: string[] = []
try {
const promptFiles = fs.readdirSync(promptDir)
for (const file of promptFiles) {
if (file.endsWith('.md')) {
const editorName = path.basename(file, '.md')
editorTypes.push(editorName)
}
}
} catch {
console.warn('Could not read prompts directory, using empty editor types')
}
return editorTypes
}
interface EditorTypes {
versioning: EditorType
intro: EditorType
}
const editorTypes = getAvailableEditorTypes()
const editorTypes: EditorTypes = {
versioning: {
description: 'Refine versioning according to simplification guidance.',
},
intro: {
description: 'Refine intro frontmatter based on SEO and content guidelines.',
},
// Enhanced recursive markdown file finder with symlink, depth, and root path checks
const findMarkdownFiles = (
dir: string,
rootDir: string,
depth: number = 0,
maxDepth: number = 20,
visited: Set<string> = new Set(),
): string[] => {
const markdownFiles: string[] = []
let realDir: string
try {
realDir = fs.realpathSync(dir)
} catch {
// If we can't resolve real path, skip this directory
return []
}
// Prevent escaping root directory
if (!realDir.startsWith(rootDir)) {
return []
}
// Prevent symlink loops
if (visited.has(realDir)) {
return []
}
visited.add(realDir)
// Prevent excessive depth
if (depth > maxDepth) {
return []
}
let entries: fs.Dirent[]
try {
entries = fs.readdirSync(realDir, { withFileTypes: true })
} catch {
// If we can't read directory, skip
return []
}
for (const entry of entries) {
const fullPath = path.join(realDir, entry.name)
let realFullPath: string
try {
realFullPath = fs.realpathSync(fullPath)
} catch {
continue
}
// Prevent escaping root directory for files
if (!realFullPath.startsWith(rootDir)) {
continue
}
if (entry.isDirectory()) {
markdownFiles.push(...findMarkdownFiles(realFullPath, rootDir, depth + 1, maxDepth, visited))
} else if (entry.isFile() && entry.name.endsWith('.md')) {
markdownFiles.push(realFullPath)
}
}
return markdownFiles
}
const refinementDescriptions = (): string => {
let str = '\n\n'
for (const [ed, edObj] of Object.entries(editorTypes)) {
str += ` ${ed.padEnd(12)} ${edObj.description}\n`
}
return str
return editorTypes.join(', ')
}
interface CliOptions {
verbose?: boolean
refine: Array<keyof EditorTypes>
prompt?: string[]
refine?: string[]
files: string[]
write?: boolean
}
@@ -71,9 +130,10 @@ program
'-w, --write',
'Write changes back to the original files (default: output to console only)',
)
.requiredOption(
.option('-p, --prompt <type...>', `Specify one or more prompt type: ${refinementDescriptions()}`)
.option(
'-r, --refine <type...>',
`Specify one or more refinement type: ${refinementDescriptions().trimEnd()}\n`,
`(Deprecated: use --prompt) Specify one or more prompt type: ${refinementDescriptions()}`,
)
.requiredOption(
'-f, --files <files...>',
@@ -84,7 +144,30 @@ program
const spinner = ora('Starting AI review...').start()
const files = options.files
const editors = options.refine
// Handle both --prompt and --refine options for backwards compatibility
const prompts = options.prompt || options.refine
if (!prompts || prompts.length === 0) {
spinner.fail('No prompt type specified. Use --prompt or --refine with one or more types.')
process.exitCode = 1
return
}
// Validate that all requested editor types exist
const availableEditors = editorTypes
for (const editor of prompts) {
if (!availableEditors.includes(editor)) {
spinner.fail(
`Unknown prompt type: ${editor}. Available types: ${availableEditors.join(', ')}`,
)
process.exitCode = 1
return
}
}
if (options.verbose) {
console.log(`Processing ${files.length} files with prompts: ${prompts.join(', ')}`)
}
for (const file of files) {
const filePath = path.resolve(process.cwd(), file)
@@ -96,37 +179,101 @@ program
continue
}
try {
spinner.text = `Reading file: ${file}`
const content = fs.readFileSync(filePath, 'utf8')
// Check if it's a directory
const isDirectory = fs.statSync(filePath).isDirectory()
for (const editorType of editors) {
spinner.text = `Running the AI-powered ${editorType} refinement...`
const answer = await callEditor(editorType, content, options.write || false)
spinner.stop()
for (const editorType of prompts) {
try {
// For other editor types, process individual files
const filesToProcess: string[] = []
if (options.write) {
// Write the result back to the original file
fs.writeFileSync(filePath, answer, 'utf8')
console.log(`✅ Updated: ${file}`)
if (isDirectory) {
// Find all markdown files in the directory recursively
// Use process.cwd() as the root directory for safety
const rootDir = fs.realpathSync(process.cwd())
filesToProcess.push(...findMarkdownFiles(filePath, rootDir))
if (filesToProcess.length === 0) {
spinner.warn(`No markdown files found in directory: ${file}`)
continue
}
spinner.text = `Found ${filesToProcess.length} markdown files in ${file}`
} else {
// Just output to console (current behavior)
console.log(answer)
filesToProcess.push(filePath)
}
spinner.start()
for (const fileToProcess of filesToProcess) {
const relativePath = path.relative(process.cwd(), fileToProcess)
spinner.text = `Processing: ${relativePath}`
try {
const content = fs.readFileSync(fileToProcess, 'utf8')
const answer = await callEditor(
editorType,
content,
options.write || false,
options.verbose || false,
)
spinner.stop()
if (options.write) {
if (editorType === 'intro') {
// For frontmatter addition/modification, merge properties instead of overwriting entire file
const updatedContent = mergeFrontmatterProperties(fileToProcess, answer)
fs.writeFileSync(fileToProcess, updatedContent, 'utf8')
console.log(`✅ Added frontmatter properties to: ${relativePath}`)
} else {
// For other editor types, write the full result back to the original file
fs.writeFileSync(fileToProcess, answer, 'utf8')
console.log(`✅ Updated: ${relativePath}`)
}
} else {
// Just output to console (current behavior)
if (filesToProcess.length > 1) {
console.log(`\n=== ${relativePath} ===`)
}
console.log(answer)
}
} catch (err) {
const error = err as Error
spinner.fail(`Error processing ${relativePath}: ${error.message}`)
process.exitCode = 1
} finally {
spinner.stop()
}
}
} catch (err) {
const error = err as Error
const targetName = path.relative(process.cwd(), filePath)
spinner.fail(`Error processing ${targetName}: ${error.message}`)
process.exitCode = 1
}
} catch (err) {
const error = err as Error
spinner.fail(`Error processing file ${file}: ${error.message}`)
process.exitCode = 1
}
}
spinner.stop()
// Exit with appropriate code based on whether any errors occurred
if (process.exitCode) {
process.exit(process.exitCode)
}
})()
})
program.parse(process.argv)
// Handle graceful shutdown
process.on('SIGINT', () => {
console.log('\n\n🛑 Process interrupted by user')
process.exit(0)
})
process.on('SIGTERM', () => {
console.log('\n\n🛑 Process terminated')
process.exit(0)
})
interface PromptMessage {
content: string
role: string
@@ -139,26 +286,111 @@ interface PromptData {
max_tokens?: number
}
async function callEditor(
editorType: keyof EditorTypes,
content: string,
writeMode: boolean,
): Promise<string> {
const markdownPromptPath = path.join(promptDir, `${editorType}.md`)
let markdownPrompt = fs.readFileSync(markdownPromptPath, 'utf8')
// Function to merge new frontmatter properties into existing file while preserving formatting
function mergeFrontmatterProperties(filePath: string, newPropertiesYaml: string): string {
const content = fs.readFileSync(filePath, 'utf8')
const parsed = readFrontmatter(content)
// For intro type in write mode, append special instructions
if (editorType === 'intro' && writeMode) {
markdownPrompt +=
'\n\n**WRITE MODE**: Output only the complete updated file content with the new intro in the frontmatter. Do not include analysis or explanations - just return the file ready to write.'
if (parsed.errors && parsed.errors.length > 0) {
throw new Error(
`Failed to parse frontmatter: ${parsed.errors.map((e) => e.message).join(', ')}`,
)
}
if (!parsed.content) {
throw new Error('Failed to parse content from file')
}
try {
// Clean up the AI response - remove markdown code blocks if present
let cleanedYaml = newPropertiesYaml.trim()
cleanedYaml = cleanedYaml.replace(/^```ya?ml\s*\n/i, '')
cleanedYaml = cleanedYaml.replace(/\n```\s*$/i, '')
cleanedYaml = cleanedYaml.trim()
interface FrontmatterProperties {
intro?: string
[key: string]: unknown
}
const newProperties = yaml.load(cleanedYaml) as FrontmatterProperties
// Security: Validate against prototype pollution using the official frontmatter schema
const allowedKeys = Object.keys(schema.properties)
const sanitizedProperties = Object.fromEntries(
Object.entries(newProperties).filter(([key]) => {
if (allowedKeys.includes(key)) {
return true
}
console.warn(`Filtered out potentially unsafe frontmatter key: ${key}`)
return false
}),
)
// Merge new properties with existing frontmatter
const mergedData: FrontmatterProperties = { ...parsed.data, ...sanitizedProperties }
// Manually ensure intro is wrapped in single quotes in the final output
let result = readFrontmatter.stringify(parsed.content, mergedData)
// Post-process to ensure intro field has single quotes
if (newProperties.intro) {
const introValue = newProperties.intro.toString()
// Replace any quote style on intro with single quotes
result = result.replace(
/^intro:\s*(['"`]?)([^'"`\n\r]+)\1?\s*$/m,
`intro: '${introValue.replace(/'/g, "''")}'`, // Escape single quotes by doubling them
)
}
return result
} catch (error) {
console.error('Failed to parse AI response as YAML:')
console.error('Raw AI response:', JSON.stringify(newPropertiesYaml))
throw new Error(`Failed to parse new frontmatter properties: ${error}`)
}
}
async function callEditor(
editorType: string,
content: string,
writeMode: boolean,
verbose = false,
): Promise<string> {
const markdownPromptPath = path.join(promptDir, `${String(editorType)}.md`)
if (!fs.existsSync(markdownPromptPath)) {
throw new Error(`Prompt file not found: ${markdownPromptPath}`)
}
const markdownPrompt = fs.readFileSync(markdownPromptPath, 'utf8')
const prompt = yaml.load(fs.readFileSync(promptTemplatePath, 'utf8')) as PromptData
// Validate the prompt template has required properties
if (!prompt.messages || !Array.isArray(prompt.messages)) {
throw new Error('Invalid prompt template: missing or invalid messages array')
}
for (const msg of prompt.messages) {
msg.content = msg.content.replace('{{markdownPrompt}}', markdownPrompt)
msg.content = msg.content.replace('{{input}}', content)
// Replace writeMode template variable with simple string replacement
msg.content = msg.content.replace(
/<!-- IF_WRITE_MODE -->/g,
writeMode ? '' : '<!-- REMOVE_START -->',
)
msg.content = msg.content.replace(
/<!-- ELSE_WRITE_MODE -->/g,
writeMode ? '<!-- REMOVE_START -->' : '',
)
msg.content = msg.content.replace(
/<!-- END_WRITE_MODE -->/g,
writeMode ? '' : '<!-- REMOVE_END -->',
)
// Remove sections marked for removal
msg.content = msg.content.replace(/<!-- REMOVE_START -->[\s\S]*?<!-- REMOVE_END -->/g, '')
}
return callModelsApi(prompt)
return callModelsApi(prompt, verbose)
}