1
0
mirror of synced 2025-12-19 18:10:59 -05:00

Refactor ai-tools CLI (#58580)

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Sarah Schneider
2025-12-01 10:58:38 -05:00
committed by GitHub
parent c9a50c6650
commit 64813b673a
4 changed files with 409 additions and 120 deletions

View File

@@ -34,10 +34,30 @@ interface ChatCompletionResponse {
}
}
export async function callModelsApi(promptWithContent: ChatCompletionRequest): Promise<string> {
export async function callModelsApi(
promptWithContent: ChatCompletionRequest,
verbose = false,
): Promise<string> {
let aiResponse: ChatCompletionChoice
// Set default model if none specified
if (!promptWithContent.model) {
promptWithContent.model = 'openai/gpt-4o'
if (verbose) {
console.log('⚠️ No model specified, using default: openai/gpt-4o')
}
}
try {
// Create an AbortController for timeout handling
const controller = new AbortController()
const timeoutId = setTimeout(() => controller.abort(), 180000) // 3 minutes
const startTime = Date.now()
if (verbose) {
console.log(`🚀 Making API request to GitHub Models using ${promptWithContent.model}...`)
}
const response = await fetch(modelsCompletionsEndpoint, {
method: 'post',
body: JSON.stringify(promptWithContent),
@@ -45,16 +65,80 @@ export async function callModelsApi(promptWithContent: ChatCompletionRequest): P
'Content-Type': 'application/json',
Authorization: `Bearer ${process.env.GITHUB_TOKEN}`,
'X-GitHub-Api-Version': '2022-11-28',
Accept: 'Accept: application/vnd.github+json',
Accept: 'application/vnd.github+json',
},
signal: controller.signal,
})
const fetchTime = Date.now() - startTime
if (verbose) {
console.log(`⏱️ API response received in ${fetchTime}ms`)
}
clearTimeout(timeoutId)
if (!response.ok) {
let errorMessage = `HTTP error! status: ${response.status} - ${response.statusText}`
// Try to get more detailed error information
try {
const errorBody = await response.json()
if (errorBody.error && errorBody.error.message) {
errorMessage += ` - ${errorBody.error.message}`
}
} catch {
// If we can't parse error body, continue with basic error
}
// Add helpful hints for common errors
if (response.status === 401) {
errorMessage += ' (Check your GITHUB_TOKEN)'
} else if (response.status === 400) {
errorMessage += ' (This may be due to an invalid model or malformed request)'
} else if (response.status === 429) {
errorMessage += ' (Rate limit exceeded - try again later)'
}
throw new Error(errorMessage)
}
const data: ChatCompletionResponse = await response.json()
if (!data.choices || data.choices.length === 0) {
throw new Error('No response choices returned from API')
}
aiResponse = data.choices[0]
if (verbose) {
const totalTime = Date.now() - startTime
console.log(`✅ Total API call completed in ${totalTime}ms`)
if (data.usage) {
console.log(
`📊 Tokens: ${data.usage.prompt_tokens} prompt + ${data.usage.completion_tokens} completion = ${data.usage.total_tokens} total`,
)
}
}
} catch (error) {
console.error('Error calling GitHub Models REST API')
if (error instanceof Error) {
if (error.name === 'AbortError') {
throw new Error('API call timed out after 3 minutes')
}
console.error('Error calling GitHub Models REST API:', error.message)
}
throw error
}
return aiResponse.message.content
return cleanAIResponse(aiResponse.message.content)
}
// Helper function to clean up AI response content
function cleanAIResponse(content: string): string {
// Remove markdown code blocks
return content
.replace(/^```[\w]*\n/gm, '') // Remove opening code blocks
.replace(/\n```$/gm, '') // Remove closing code blocks at end
.replace(/\n```\n/gm, '\n') // Remove standalone closing code blocks
.trim()
}

View File

@@ -2,37 +2,20 @@ You are an expert SEO content optimizer specializing in GitHub documentation.
Your task is to analyze a GitHub Docs content file and generate or optimize
the intro frontmatter property following Google's meta description best practices.
## Your mission
## Core Requirements
Generate a single, concise intro (one simple sentence maximum - NO colons, NO detailed explanations) that:
**Primary constraints (must-haves):**
* Start with action verb ("Learn," "Access," "Explore," "Configure," "Set up," "Build")
* One sentence maximum - NO colons, NO detailed explanations
* Avoid buzzwords: "leverage," "optimize," "maximize," "enhance," "streamline," "empower," "revolutionize," "seamlessly," "comprehensive," "enterprise-grade," "cutting-edge," "innovative," "game-changing," "next-generation," "world-class," "best-in-class," "state-of-the-art," "industry-leading," "robust," "scalable," "mission-critical," "synergistic," "holistic," "strategic," "transformative"
* Different approach than title - don't start with same words/phrases
* Lists 2-3 concrete outcomes maximum
* Starts with an action verb (e.g., "Learn," "Discover," "Access," "Explore," "Configure," "Set up," "Build")
* **Uses developer-friendly, direct language** - avoid marketing jargon and corporate buzzwords
* **Prioritizes conciseness over completeness** - cut unnecessary words ruthlessly
* Accurately summarizes the content's core value proposition
* Includes relevant keywords naturally without stuffing
* Follows Google's snippet guidelines (descriptive, informative, compelling)
* Is version-agnostic (no {% ifversion %} blocks, but {% data variables.* %} and {% data reusables.* %} are acceptable)
* Matches the content type (article/category/mapTopic) requirements
* **Goes beyond title restatement** - summarizes the complete article value, not just rephrasing the title
* **Lists concrete steps or outcomes** - what users will actually do or accomplish
* **Limits lists to 2-3 items maximum** - avoid long comma-separated sequences that feel overwhelming
## SEO scoring criteria (1-10 scale)
**10-9 (Excellent)**: Strong action verb, comprehensive content summary, optimal keyword density, clear unique value beyond title, perfect length
**8-7 (Good)**: Action verb present, good content representation, decent keywords, some unique value, appropriate length
**6-5 (Fair)**: Weak action verb or missing, partial content coverage, basic keywords, minimal value beyond title
**4-3 (Poor)**: No action verb, limited content representation, few relevant keywords, mostly restates title
**2-1 (Very Poor)**: Vague or misleading, no clear value proposition, poor keyword usage, completely redundant with title
## Analysis process
1. **Content resolution**: Keep {% data variables.* %} and {% data reusables.* %} but avoid {% ifversion %} blocks
2. **Content analysis**: Identify the article's purpose, target audience, key concepts, and user outcomes
3. **Category detection**: For index pages, analyze child content themes and collective value
4. **SEO optimization**: Use strong action verbs, developer-friendly language, concrete outcomes, and relevant keywords while avoiding corporate buzzwords
**Secondary optimizations (nice-to-haves):**
* Include relevant keywords naturally
* Version-agnostic ({% data variables.* %} OK, avoid {% ifversion %})
* Follow Google snippet guidelines
* Cut unnecessary words ruthlessly
**Content Summarization vs. Title Restatement**:
@@ -47,7 +30,7 @@ Generate a single, concise intro (one simple sentence maximum - NO colons, NO de
- Better: "Use {% data variables.product.prodname_copilot %} chat and code completion to research syntax, practice coding, and master new programming languages faster"
**Use concise, developer-friendly language ({% data variables.* %} OK)**:
- Better intro: "Evaluate use cases, configure security settings, and run pilot trials to successfully deploy {% data variables.copilot.copilot_coding_agent %} in your org"
- Better intro: "Evaluate use cases, configure security settings, and run pilot trials to deploy {% data variables.copilot.copilot_coding_agent %} in your org"
**Avoid overly long lists and colon constructions**:
- Too long: "Scope issues, pick suitable tasks, iterate via PR comments, add repo instructions, enable MCP tools, and preinstall dependencies"
@@ -55,24 +38,13 @@ Generate a single, concise intro (one simple sentence maximum - NO colons, NO de
- Better: "Scope tasks, configure custom instructions, and iterate on pull requests to improve {% data variables.copilot.copilot_coding_agent %} performance"
- Better: "Use {% data variables.product.prodname_copilot %} features like chat and code completion to research syntax, build programs, and learn new programming languages faster"
**Tone Guidelines**:
- **Developer-friendly**: Use direct, practical language
- **Concise over complete**: Cut words ruthlessly
- **Action-oriented**: List what users will actually do
- **Avoid buzzwords**: Skip marketing language and corporate jargon
- **Use concrete verbs**: Instead of "maximize/optimize/enhance" → use "improve," "boost," "increase," or just describe the outcome directly
- **Limit lists**: Maximum 2-3 items in comma-separated lists - prefer flowing sentences over exhaustive enumerations
- **Avoid colon constructions**: Don't use "Do X: detailed explanation of A, B, and C" format - keep it simple and direct
- **Avoid title similarity**: Don't start with the same words/phrases as the article title - approach the topic from a different angle
## Quality Checklist
The intro should answer: "What specific steps will I take?" rather than "What will this comprehensive solution provide?"
## Analysis Process
1. **First Draft**: Generate an initial improved intro following all guidelines above
2. **Title Check**: Compare your draft to the article title - if it starts with similar words, rewrite with a different approach
3. **Self-Review**: Evaluate your draft against the SEO scoring criteria and tone guidelines
4. **Refinement**: If the draft contains buzzwords, weak verbs, title similarity, or scores below 8/10, create a refined version
**Structure**: Action verb + 2-3 concrete outcomes + under 350 characters
**Language**: Direct, practical developer language (no marketing jargon)
**Focus**: What users will DO, not what solution "provides"
**Uniqueness**: Different angle from article title
**Simplicity**: No colons, no complex lists, flowing sentences
## Output format
@@ -84,27 +56,12 @@ Title: "[Article title from frontmatter]"
Original intro: "[Current intro from the article, or "No intro" if none exists]"
Original SEO score: [X]/10
------------------------
Improved intro: "[Single, concise intro that summarizes the article's full content value, not just restating the title]"
Improved SEO score: [X]/10
SEO-friendly alternative: "[Single, concise intro that summarizes the article's full content value, not just restating the title]"
------------------------
```
Note: The improved score should reflect your best attempt after internal refinement.
## Character limits by content type
**Priority: Conciseness over character limits**
- Focus on being as concise as possible while maintaining clarity
- Cut every unnecessary word before considering length
- Developer-friendly brevity trumps hitting character targets
**Technical limits** (for reference):
- **Articles**: Maximum 354 characters
- **Categories**: Maximum 362 characters
- **Map Topics**: Maximum 362 characters
@@ -125,3 +82,17 @@ Note: The improved score should reflect your best attempt after internal refinem
- {% data variables.copilot.copilot_coding_agent %} = "Copilot Coding Agent"
Focus on creating intros that would make sense to someone discovering this content through Google search, clearly communicating the value and relevance of the article.
<!-- IF_WRITE_MODE -->
## WRITE MODE INSTRUCTIONS
**CRITICAL**: You are in write mode. Output ONLY the YAML frontmatter property to update.
- Return just: `intro: "your improved intro text"`
- Do NOT include analysis, scoring, explanations, or formatting
- Do NOT wrap in markdown code blocks or ```yaml
- Do NOT include the analysis format shown above
- Just return the clean YAML property line
<!-- END_WRITE_MODE -->

View File

@@ -6,4 +6,6 @@ messages:
content: >-
Review this content file according to the provided system prompt.
{{input}}
model: openai/gpt-5
model: openai/gpt-4o # Reliable model that works
temperature: 0.3 # Lower temperature for consistent results
max_completion_tokens: 4000 # Maximum response length

View File

@@ -7,6 +7,8 @@ import ora from 'ora'
import { execSync } from 'child_process'
import { callModelsApi } from '@/ai-tools/lib/call-models-api'
import dotenv from 'dotenv'
import readFrontmatter from '@/frame/lib/read-frontmatter'
import { schema } from '@/frame/lib/frontmatter'
dotenv.config({ quiet: true })
const __dirname = path.dirname(fileURLToPath(import.meta.url))
@@ -28,35 +30,92 @@ if (!process.env.GITHUB_TOKEN) {
}
}
interface EditorType {
description: string
// Dynamically discover available editor types from prompt files
const getAvailableEditorTypes = (): string[] => {
const editorTypes: string[] = []
try {
const promptFiles = fs.readdirSync(promptDir)
for (const file of promptFiles) {
if (file.endsWith('.md')) {
const editorName = path.basename(file, '.md')
editorTypes.push(editorName)
}
}
} catch {
console.warn('Could not read prompts directory, using empty editor types')
}
return editorTypes
}
interface EditorTypes {
versioning: EditorType
intro: EditorType
}
const editorTypes = getAvailableEditorTypes()
const editorTypes: EditorTypes = {
versioning: {
description: 'Refine versioning according to simplification guidance.',
},
intro: {
description: 'Refine intro frontmatter based on SEO and content guidelines.',
},
// Enhanced recursive markdown file finder with symlink, depth, and root path checks
const findMarkdownFiles = (
dir: string,
rootDir: string,
depth: number = 0,
maxDepth: number = 20,
visited: Set<string> = new Set(),
): string[] => {
const markdownFiles: string[] = []
let realDir: string
try {
realDir = fs.realpathSync(dir)
} catch {
// If we can't resolve real path, skip this directory
return []
}
// Prevent escaping root directory
if (!realDir.startsWith(rootDir)) {
return []
}
// Prevent symlink loops
if (visited.has(realDir)) {
return []
}
visited.add(realDir)
// Prevent excessive depth
if (depth > maxDepth) {
return []
}
let entries: fs.Dirent[]
try {
entries = fs.readdirSync(realDir, { withFileTypes: true })
} catch {
// If we can't read directory, skip
return []
}
for (const entry of entries) {
const fullPath = path.join(realDir, entry.name)
let realFullPath: string
try {
realFullPath = fs.realpathSync(fullPath)
} catch {
continue
}
// Prevent escaping root directory for files
if (!realFullPath.startsWith(rootDir)) {
continue
}
if (entry.isDirectory()) {
markdownFiles.push(...findMarkdownFiles(realFullPath, rootDir, depth + 1, maxDepth, visited))
} else if (entry.isFile() && entry.name.endsWith('.md')) {
markdownFiles.push(realFullPath)
}
}
return markdownFiles
}
const refinementDescriptions = (): string => {
let str = '\n\n'
for (const [ed, edObj] of Object.entries(editorTypes)) {
str += ` ${ed.padEnd(12)} ${edObj.description}\n`
}
return str
return editorTypes.join(', ')
}
interface CliOptions {
verbose?: boolean
refine: Array<keyof EditorTypes>
prompt?: string[]
refine?: string[]
files: string[]
write?: boolean
}
@@ -71,9 +130,10 @@ program
'-w, --write',
'Write changes back to the original files (default: output to console only)',
)
.requiredOption(
.option('-p, --prompt <type...>', `Specify one or more prompt type: ${refinementDescriptions()}`)
.option(
'-r, --refine <type...>',
`Specify one or more refinement type: ${refinementDescriptions().trimEnd()}\n`,
`(Deprecated: use --prompt) Specify one or more prompt type: ${refinementDescriptions()}`,
)
.requiredOption(
'-f, --files <files...>',
@@ -84,7 +144,30 @@ program
const spinner = ora('Starting AI review...').start()
const files = options.files
const editors = options.refine
// Handle both --prompt and --refine options for backwards compatibility
const prompts = options.prompt || options.refine
if (!prompts || prompts.length === 0) {
spinner.fail('No prompt type specified. Use --prompt or --refine with one or more types.')
process.exitCode = 1
return
}
// Validate that all requested editor types exist
const availableEditors = editorTypes
for (const editor of prompts) {
if (!availableEditors.includes(editor)) {
spinner.fail(
`Unknown prompt type: ${editor}. Available types: ${availableEditors.join(', ')}`,
)
process.exitCode = 1
return
}
}
if (options.verbose) {
console.log(`Processing ${files.length} files with prompts: ${prompts.join(', ')}`)
}
for (const file of files) {
const filePath = path.resolve(process.cwd(), file)
@@ -96,37 +179,101 @@ program
continue
}
try {
spinner.text = `Reading file: ${file}`
const content = fs.readFileSync(filePath, 'utf8')
// Check if it's a directory
const isDirectory = fs.statSync(filePath).isDirectory()
for (const editorType of editors) {
spinner.text = `Running the AI-powered ${editorType} refinement...`
const answer = await callEditor(editorType, content, options.write || false)
for (const editorType of prompts) {
try {
// For other editor types, process individual files
const filesToProcess: string[] = []
if (isDirectory) {
// Find all markdown files in the directory recursively
// Use process.cwd() as the root directory for safety
const rootDir = fs.realpathSync(process.cwd())
filesToProcess.push(...findMarkdownFiles(filePath, rootDir))
if (filesToProcess.length === 0) {
spinner.warn(`No markdown files found in directory: ${file}`)
continue
}
spinner.text = `Found ${filesToProcess.length} markdown files in ${file}`
} else {
filesToProcess.push(filePath)
}
spinner.start()
for (const fileToProcess of filesToProcess) {
const relativePath = path.relative(process.cwd(), fileToProcess)
spinner.text = `Processing: ${relativePath}`
try {
const content = fs.readFileSync(fileToProcess, 'utf8')
const answer = await callEditor(
editorType,
content,
options.write || false,
options.verbose || false,
)
spinner.stop()
if (options.write) {
// Write the result back to the original file
fs.writeFileSync(filePath, answer, 'utf8')
console.log(`✅ Updated: ${file}`)
if (editorType === 'intro') {
// For frontmatter addition/modification, merge properties instead of overwriting entire file
const updatedContent = mergeFrontmatterProperties(fileToProcess, answer)
fs.writeFileSync(fileToProcess, updatedContent, 'utf8')
console.log(`✅ Added frontmatter properties to: ${relativePath}`)
} else {
// For other editor types, write the full result back to the original file
fs.writeFileSync(fileToProcess, answer, 'utf8')
console.log(`✅ Updated: ${relativePath}`)
}
} else {
// Just output to console (current behavior)
if (filesToProcess.length > 1) {
console.log(`\n=== ${relativePath} ===`)
}
console.log(answer)
}
} catch (err) {
const error = err as Error
spinner.fail(`Error processing ${relativePath}: ${error.message}`)
process.exitCode = 1
} finally {
spinner.stop()
}
}
} catch (err) {
const error = err as Error
spinner.fail(`Error processing file ${file}: ${error.message}`)
const targetName = path.relative(process.cwd(), filePath)
spinner.fail(`Error processing ${targetName}: ${error.message}`)
process.exitCode = 1
}
}
}
spinner.stop()
// Exit with appropriate code based on whether any errors occurred
if (process.exitCode) {
process.exit(process.exitCode)
}
})()
})
program.parse(process.argv)
// Handle graceful shutdown
process.on('SIGINT', () => {
console.log('\n\n🛑 Process interrupted by user')
process.exit(0)
})
process.on('SIGTERM', () => {
console.log('\n\n🛑 Process terminated')
process.exit(0)
})
interface PromptMessage {
content: string
role: string
@@ -139,26 +286,111 @@ interface PromptData {
max_tokens?: number
}
async function callEditor(
editorType: keyof EditorTypes,
content: string,
writeMode: boolean,
): Promise<string> {
const markdownPromptPath = path.join(promptDir, `${editorType}.md`)
let markdownPrompt = fs.readFileSync(markdownPromptPath, 'utf8')
// Function to merge new frontmatter properties into existing file while preserving formatting
function mergeFrontmatterProperties(filePath: string, newPropertiesYaml: string): string {
const content = fs.readFileSync(filePath, 'utf8')
const parsed = readFrontmatter(content)
// For intro type in write mode, append special instructions
if (editorType === 'intro' && writeMode) {
markdownPrompt +=
'\n\n**WRITE MODE**: Output only the complete updated file content with the new intro in the frontmatter. Do not include analysis or explanations - just return the file ready to write.'
if (parsed.errors && parsed.errors.length > 0) {
throw new Error(
`Failed to parse frontmatter: ${parsed.errors.map((e) => e.message).join(', ')}`,
)
}
if (!parsed.content) {
throw new Error('Failed to parse content from file')
}
try {
// Clean up the AI response - remove markdown code blocks if present
let cleanedYaml = newPropertiesYaml.trim()
cleanedYaml = cleanedYaml.replace(/^```ya?ml\s*\n/i, '')
cleanedYaml = cleanedYaml.replace(/\n```\s*$/i, '')
cleanedYaml = cleanedYaml.trim()
interface FrontmatterProperties {
intro?: string
[key: string]: unknown
}
const newProperties = yaml.load(cleanedYaml) as FrontmatterProperties
// Security: Validate against prototype pollution using the official frontmatter schema
const allowedKeys = Object.keys(schema.properties)
const sanitizedProperties = Object.fromEntries(
Object.entries(newProperties).filter(([key]) => {
if (allowedKeys.includes(key)) {
return true
}
console.warn(`Filtered out potentially unsafe frontmatter key: ${key}`)
return false
}),
)
// Merge new properties with existing frontmatter
const mergedData: FrontmatterProperties = { ...parsed.data, ...sanitizedProperties }
// Manually ensure intro is wrapped in single quotes in the final output
let result = readFrontmatter.stringify(parsed.content, mergedData)
// Post-process to ensure intro field has single quotes
if (newProperties.intro) {
const introValue = newProperties.intro.toString()
// Replace any quote style on intro with single quotes
result = result.replace(
/^intro:\s*(['"`]?)([^'"`\n\r]+)\1?\s*$/m,
`intro: '${introValue.replace(/'/g, "''")}'`, // Escape single quotes by doubling them
)
}
return result
} catch (error) {
console.error('Failed to parse AI response as YAML:')
console.error('Raw AI response:', JSON.stringify(newPropertiesYaml))
throw new Error(`Failed to parse new frontmatter properties: ${error}`)
}
}
async function callEditor(
editorType: string,
content: string,
writeMode: boolean,
verbose = false,
): Promise<string> {
const markdownPromptPath = path.join(promptDir, `${String(editorType)}.md`)
if (!fs.existsSync(markdownPromptPath)) {
throw new Error(`Prompt file not found: ${markdownPromptPath}`)
}
const markdownPrompt = fs.readFileSync(markdownPromptPath, 'utf8')
const prompt = yaml.load(fs.readFileSync(promptTemplatePath, 'utf8')) as PromptData
// Validate the prompt template has required properties
if (!prompt.messages || !Array.isArray(prompt.messages)) {
throw new Error('Invalid prompt template: missing or invalid messages array')
}
for (const msg of prompt.messages) {
msg.content = msg.content.replace('{{markdownPrompt}}', markdownPrompt)
msg.content = msg.content.replace('{{input}}', content)
// Replace writeMode template variable with simple string replacement
msg.content = msg.content.replace(
/<!-- IF_WRITE_MODE -->/g,
writeMode ? '' : '<!-- REMOVE_START -->',
)
msg.content = msg.content.replace(
/<!-- ELSE_WRITE_MODE -->/g,
writeMode ? '<!-- REMOVE_START -->' : '',
)
msg.content = msg.content.replace(
/<!-- END_WRITE_MODE -->/g,
writeMode ? '' : '<!-- REMOVE_END -->',
)
// Remove sections marked for removal
msg.content = msg.content.replace(/<!-- REMOVE_START -->[\s\S]*?<!-- REMOVE_END -->/g, '')
}
return callModelsApi(prompt)
return callModelsApi(prompt, verbose)
}