diff --git a/src/ai-tools/lib/call-models-api.ts b/src/ai-tools/lib/call-models-api.ts index dda311baa0..e08638e3c3 100644 --- a/src/ai-tools/lib/call-models-api.ts +++ b/src/ai-tools/lib/call-models-api.ts @@ -34,10 +34,30 @@ interface ChatCompletionResponse { } } -export async function callModelsApi(promptWithContent: ChatCompletionRequest): Promise { +export async function callModelsApi( + promptWithContent: ChatCompletionRequest, + verbose = false, +): Promise { let aiResponse: ChatCompletionChoice + // Set default model if none specified + if (!promptWithContent.model) { + promptWithContent.model = 'openai/gpt-4o' + if (verbose) { + console.log('āš ļø No model specified, using default: openai/gpt-4o') + } + } + try { + // Create an AbortController for timeout handling + const controller = new AbortController() + const timeoutId = setTimeout(() => controller.abort(), 180000) // 3 minutes + + const startTime = Date.now() + if (verbose) { + console.log(`šŸš€ Making API request to GitHub Models using ${promptWithContent.model}...`) + } + const response = await fetch(modelsCompletionsEndpoint, { method: 'post', body: JSON.stringify(promptWithContent), @@ -45,16 +65,80 @@ export async function callModelsApi(promptWithContent: ChatCompletionRequest): P 'Content-Type': 'application/json', Authorization: `Bearer ${process.env.GITHUB_TOKEN}`, 'X-GitHub-Api-Version': '2022-11-28', - Accept: 'Accept: application/vnd.github+json', + Accept: 'application/vnd.github+json', }, + signal: controller.signal, }) + const fetchTime = Date.now() - startTime + if (verbose) { + console.log(`ā±ļø API response received in ${fetchTime}ms`) + } + + clearTimeout(timeoutId) + + if (!response.ok) { + let errorMessage = `HTTP error! status: ${response.status} - ${response.statusText}` + + // Try to get more detailed error information + try { + const errorBody = await response.json() + if (errorBody.error && errorBody.error.message) { + errorMessage += ` - ${errorBody.error.message}` + } + } catch { + // If we can't parse error body, continue with basic error + } + + // Add helpful hints for common errors + if (response.status === 401) { + errorMessage += ' (Check your GITHUB_TOKEN)' + } else if (response.status === 400) { + errorMessage += ' (This may be due to an invalid model or malformed request)' + } else if (response.status === 429) { + errorMessage += ' (Rate limit exceeded - try again later)' + } + + throw new Error(errorMessage) + } + const data: ChatCompletionResponse = await response.json() + + if (!data.choices || data.choices.length === 0) { + throw new Error('No response choices returned from API') + } + aiResponse = data.choices[0] + + if (verbose) { + const totalTime = Date.now() - startTime + console.log(`āœ… Total API call completed in ${totalTime}ms`) + + if (data.usage) { + console.log( + `šŸ“Š Tokens: ${data.usage.prompt_tokens} prompt + ${data.usage.completion_tokens} completion = ${data.usage.total_tokens} total`, + ) + } + } } catch (error) { - console.error('Error calling GitHub Models REST API') + if (error instanceof Error) { + if (error.name === 'AbortError') { + throw new Error('API call timed out after 3 minutes') + } + console.error('Error calling GitHub Models REST API:', error.message) + } throw error } - return aiResponse.message.content + return cleanAIResponse(aiResponse.message.content) +} + +// Helper function to clean up AI response content +function cleanAIResponse(content: string): string { + // Remove markdown code blocks + return content + .replace(/^```[\w]*\n/gm, '') // Remove opening code blocks + .replace(/\n```$/gm, '') // Remove closing code blocks at end + .replace(/\n```\n/gm, '\n') // Remove standalone closing code blocks + .trim() } diff --git a/src/ai-tools/prompts/intro.md b/src/ai-tools/prompts/intro.md index ddf4eeccbb..5a0b6be7e9 100644 --- a/src/ai-tools/prompts/intro.md +++ b/src/ai-tools/prompts/intro.md @@ -2,37 +2,20 @@ You are an expert SEO content optimizer specializing in GitHub documentation. Your task is to analyze a GitHub Docs content file and generate or optimize the intro frontmatter property following Google's meta description best practices. -## Your mission - -Generate a single, concise intro (one simple sentence maximum - NO colons, NO detailed explanations) that: +## Core Requirements -* Starts with an action verb (e.g., "Learn," "Discover," "Access," "Explore," "Configure," "Set up," "Build") -* **Uses developer-friendly, direct language** - avoid marketing jargon and corporate buzzwords -* **Prioritizes conciseness over completeness** - cut unnecessary words ruthlessly -* Accurately summarizes the content's core value proposition -* Includes relevant keywords naturally without stuffing -* Follows Google's snippet guidelines (descriptive, informative, compelling) -* Is version-agnostic (no {% ifversion %} blocks, but {% data variables.* %} and {% data reusables.* %} are acceptable) -* Matches the content type (article/category/mapTopic) requirements -* **Goes beyond title restatement** - summarizes the complete article value, not just rephrasing the title -* **Lists concrete steps or outcomes** - what users will actually do or accomplish -* **Limits lists to 2-3 items maximum** - avoid long comma-separated sequences that feel overwhelming +**Primary constraints (must-haves):** +* Start with action verb ("Learn," "Access," "Explore," "Configure," "Set up," "Build") +* One sentence maximum - NO colons, NO detailed explanations +* Avoid buzzwords: "leverage," "optimize," "maximize," "enhance," "streamline," "empower," "revolutionize," "seamlessly," "comprehensive," "enterprise-grade," "cutting-edge," "innovative," "game-changing," "next-generation," "world-class," "best-in-class," "state-of-the-art," "industry-leading," "robust," "scalable," "mission-critical," "synergistic," "holistic," "strategic," "transformative" +* Different approach than title - don't start with same words/phrases +* Lists 2-3 concrete outcomes maximum -## SEO scoring criteria (1-10 scale) - -**10-9 (Excellent)**: Strong action verb, comprehensive content summary, optimal keyword density, clear unique value beyond title, perfect length -**8-7 (Good)**: Action verb present, good content representation, decent keywords, some unique value, appropriate length -**6-5 (Fair)**: Weak action verb or missing, partial content coverage, basic keywords, minimal value beyond title -**4-3 (Poor)**: No action verb, limited content representation, few relevant keywords, mostly restates title -**2-1 (Very Poor)**: Vague or misleading, no clear value proposition, poor keyword usage, completely redundant with title - -## Analysis process - -1. **Content resolution**: Keep {% data variables.* %} and {% data reusables.* %} but avoid {% ifversion %} blocks -2. **Content analysis**: Identify the article's purpose, target audience, key concepts, and user outcomes -3. **Category detection**: For index pages, analyze child content themes and collective value - -4. **SEO optimization**: Use strong action verbs, developer-friendly language, concrete outcomes, and relevant keywords while avoiding corporate buzzwords +**Secondary optimizations (nice-to-haves):** +* Include relevant keywords naturally +* Version-agnostic ({% data variables.* %} OK, avoid {% ifversion %}) +* Follow Google snippet guidelines +* Cut unnecessary words ruthlessly **Content Summarization vs. Title Restatement**: @@ -47,7 +30,7 @@ Generate a single, concise intro (one simple sentence maximum - NO colons, NO de - Better: "Use {% data variables.product.prodname_copilot %} chat and code completion to research syntax, practice coding, and master new programming languages faster" āœ… **Use concise, developer-friendly language ({% data variables.* %} OK)**: -- Better intro: "Evaluate use cases, configure security settings, and run pilot trials to successfully deploy {% data variables.copilot.copilot_coding_agent %} in your org" +- Better intro: "Evaluate use cases, configure security settings, and run pilot trials to deploy {% data variables.copilot.copilot_coding_agent %} in your org" āŒ **Avoid overly long lists and colon constructions**: - Too long: "Scope issues, pick suitable tasks, iterate via PR comments, add repo instructions, enable MCP tools, and preinstall dependencies" @@ -55,24 +38,13 @@ Generate a single, concise intro (one simple sentence maximum - NO colons, NO de - Better: "Scope tasks, configure custom instructions, and iterate on pull requests to improve {% data variables.copilot.copilot_coding_agent %} performance" - Better: "Use {% data variables.product.prodname_copilot %} features like chat and code completion to research syntax, build programs, and learn new programming languages faster" -**Tone Guidelines**: -- **Developer-friendly**: Use direct, practical language -- **Concise over complete**: Cut words ruthlessly -- **Action-oriented**: List what users will actually do -- **Avoid buzzwords**: Skip marketing language and corporate jargon -- **Use concrete verbs**: Instead of "maximize/optimize/enhance" → use "improve," "boost," "increase," or just describe the outcome directly -- **Limit lists**: Maximum 2-3 items in comma-separated lists - prefer flowing sentences over exhaustive enumerations -- **Avoid colon constructions**: Don't use "Do X: detailed explanation of A, B, and C" format - keep it simple and direct -- **Avoid title similarity**: Don't start with the same words/phrases as the article title - approach the topic from a different angle +## Quality Checklist -The intro should answer: "What specific steps will I take?" rather than "What will this comprehensive solution provide?" - -## Analysis Process - -1. **First Draft**: Generate an initial improved intro following all guidelines above -2. **Title Check**: Compare your draft to the article title - if it starts with similar words, rewrite with a different approach -3. **Self-Review**: Evaluate your draft against the SEO scoring criteria and tone guidelines -4. **Refinement**: If the draft contains buzzwords, weak verbs, title similarity, or scores below 8/10, create a refined version +āœ… **Structure**: Action verb + 2-3 concrete outcomes + under 350 characters +āœ… **Language**: Direct, practical developer language (no marketing jargon) +āœ… **Focus**: What users will DO, not what solution "provides" +āœ… **Uniqueness**: Different angle from article title +āœ… **Simplicity**: No colons, no complex lists, flowing sentences ## Output format @@ -84,27 +56,12 @@ Title: "[Article title from frontmatter]" Original intro: "[Current intro from the article, or "No intro" if none exists]" - -Original SEO score: [X]/10 ------------------------- - -Improved intro: "[Single, concise intro that summarizes the article's full content value, not just restating the title]" - - -Improved SEO score: [X]/10 +SEO-friendly alternative: "[Single, concise intro that summarizes the article's full content value, not just restating the title]" ------------------------ ``` -Note: The improved score should reflect your best attempt after internal refinement. - ## Character limits by content type -**Priority: Conciseness over character limits** -- Focus on being as concise as possible while maintaining clarity -- Cut every unnecessary word before considering length -- Developer-friendly brevity trumps hitting character targets - -**Technical limits** (for reference): - **Articles**: Maximum 354 characters - **Categories**: Maximum 362 characters - **Map Topics**: Maximum 362 characters @@ -124,4 +81,18 @@ Note: The improved score should reflect your best attempt after internal refinem - {% data variables.product.prodname_copilot %} = "GitHub Copilot" - {% data variables.copilot.copilot_coding_agent %} = "Copilot Coding Agent" -Focus on creating intros that would make sense to someone discovering this content through Google search, clearly communicating the value and relevance of the article. \ No newline at end of file +Focus on creating intros that would make sense to someone discovering this content through Google search, clearly communicating the value and relevance of the article. + + + +## WRITE MODE INSTRUCTIONS + +**CRITICAL**: You are in write mode. Output ONLY the YAML frontmatter property to update. + +- Return just: `intro: "your improved intro text"` +- Do NOT include analysis, scoring, explanations, or formatting +- Do NOT wrap in markdown code blocks or ```yaml +- Do NOT include the analysis format shown above +- Just return the clean YAML property line + + \ No newline at end of file diff --git a/src/ai-tools/prompts/prompt-template.yml b/src/ai-tools/prompts/prompt-template.yml index dab8d13adf..293a3decfe 100644 --- a/src/ai-tools/prompts/prompt-template.yml +++ b/src/ai-tools/prompts/prompt-template.yml @@ -6,4 +6,6 @@ messages: content: >- Review this content file according to the provided system prompt. {{input}} -model: openai/gpt-5 +model: openai/gpt-4o # Reliable model that works +temperature: 0.3 # Lower temperature for consistent results +max_completion_tokens: 4000 # Maximum response length diff --git a/src/ai-tools/scripts/ai-tools.ts b/src/ai-tools/scripts/ai-tools.ts index 1be467e02b..fde5245421 100644 --- a/src/ai-tools/scripts/ai-tools.ts +++ b/src/ai-tools/scripts/ai-tools.ts @@ -7,6 +7,8 @@ import ora from 'ora' import { execSync } from 'child_process' import { callModelsApi } from '@/ai-tools/lib/call-models-api' import dotenv from 'dotenv' +import readFrontmatter from '@/frame/lib/read-frontmatter' +import { schema } from '@/frame/lib/frontmatter' dotenv.config({ quiet: true }) const __dirname = path.dirname(fileURLToPath(import.meta.url)) @@ -28,35 +30,92 @@ if (!process.env.GITHUB_TOKEN) { } } -interface EditorType { - description: string +// Dynamically discover available editor types from prompt files +const getAvailableEditorTypes = (): string[] => { + const editorTypes: string[] = [] + + try { + const promptFiles = fs.readdirSync(promptDir) + for (const file of promptFiles) { + if (file.endsWith('.md')) { + const editorName = path.basename(file, '.md') + editorTypes.push(editorName) + } + } + } catch { + console.warn('Could not read prompts directory, using empty editor types') + } + + return editorTypes } -interface EditorTypes { - versioning: EditorType - intro: EditorType -} +const editorTypes = getAvailableEditorTypes() -const editorTypes: EditorTypes = { - versioning: { - description: 'Refine versioning according to simplification guidance.', - }, - intro: { - description: 'Refine intro frontmatter based on SEO and content guidelines.', - }, +// Enhanced recursive markdown file finder with symlink, depth, and root path checks +const findMarkdownFiles = ( + dir: string, + rootDir: string, + depth: number = 0, + maxDepth: number = 20, + visited: Set = new Set(), +): string[] => { + const markdownFiles: string[] = [] + let realDir: string + try { + realDir = fs.realpathSync(dir) + } catch { + // If we can't resolve real path, skip this directory + return [] + } + // Prevent escaping root directory + if (!realDir.startsWith(rootDir)) { + return [] + } + // Prevent symlink loops + if (visited.has(realDir)) { + return [] + } + visited.add(realDir) + // Prevent excessive depth + if (depth > maxDepth) { + return [] + } + let entries: fs.Dirent[] + try { + entries = fs.readdirSync(realDir, { withFileTypes: true }) + } catch { + // If we can't read directory, skip + return [] + } + for (const entry of entries) { + const fullPath = path.join(realDir, entry.name) + let realFullPath: string + try { + realFullPath = fs.realpathSync(fullPath) + } catch { + continue + } + // Prevent escaping root directory for files + if (!realFullPath.startsWith(rootDir)) { + continue + } + if (entry.isDirectory()) { + markdownFiles.push(...findMarkdownFiles(realFullPath, rootDir, depth + 1, maxDepth, visited)) + } else if (entry.isFile() && entry.name.endsWith('.md')) { + markdownFiles.push(realFullPath) + } + } + return markdownFiles } const refinementDescriptions = (): string => { - let str = '\n\n' - for (const [ed, edObj] of Object.entries(editorTypes)) { - str += ` ${ed.padEnd(12)} ${edObj.description}\n` - } - return str + return editorTypes.join(', ') } interface CliOptions { verbose?: boolean - refine: Array + prompt?: string[] + refine?: string[] files: string[] write?: boolean } @@ -71,9 +130,10 @@ program '-w, --write', 'Write changes back to the original files (default: output to console only)', ) - .requiredOption( + .option('-p, --prompt ', `Specify one or more prompt type: ${refinementDescriptions()}`) + .option( '-r, --refine ', - `Specify one or more refinement type: ${refinementDescriptions().trimEnd()}\n`, + `(Deprecated: use --prompt) Specify one or more prompt type: ${refinementDescriptions()}`, ) .requiredOption( '-f, --files ', @@ -84,7 +144,30 @@ program const spinner = ora('Starting AI review...').start() const files = options.files - const editors = options.refine + // Handle both --prompt and --refine options for backwards compatibility + const prompts = options.prompt || options.refine + + if (!prompts || prompts.length === 0) { + spinner.fail('No prompt type specified. Use --prompt or --refine with one or more types.') + process.exitCode = 1 + return + } + + // Validate that all requested editor types exist + const availableEditors = editorTypes + for (const editor of prompts) { + if (!availableEditors.includes(editor)) { + spinner.fail( + `Unknown prompt type: ${editor}. Available types: ${availableEditors.join(', ')}`, + ) + process.exitCode = 1 + return + } + } + + if (options.verbose) { + console.log(`Processing ${files.length} files with prompts: ${prompts.join(', ')}`) + } for (const file of files) { const filePath = path.resolve(process.cwd(), file) @@ -96,37 +179,101 @@ program continue } - try { - spinner.text = `Reading file: ${file}` - const content = fs.readFileSync(filePath, 'utf8') + // Check if it's a directory + const isDirectory = fs.statSync(filePath).isDirectory() - for (const editorType of editors) { - spinner.text = `Running the AI-powered ${editorType} refinement...` - const answer = await callEditor(editorType, content, options.write || false) - spinner.stop() + for (const editorType of prompts) { + try { + // For other editor types, process individual files + const filesToProcess: string[] = [] - if (options.write) { - // Write the result back to the original file - fs.writeFileSync(filePath, answer, 'utf8') - console.log(`āœ… Updated: ${file}`) + if (isDirectory) { + // Find all markdown files in the directory recursively + // Use process.cwd() as the root directory for safety + const rootDir = fs.realpathSync(process.cwd()) + filesToProcess.push(...findMarkdownFiles(filePath, rootDir)) + + if (filesToProcess.length === 0) { + spinner.warn(`No markdown files found in directory: ${file}`) + continue + } + + spinner.text = `Found ${filesToProcess.length} markdown files in ${file}` } else { - // Just output to console (current behavior) - console.log(answer) + filesToProcess.push(filePath) } + + spinner.start() + for (const fileToProcess of filesToProcess) { + const relativePath = path.relative(process.cwd(), fileToProcess) + spinner.text = `Processing: ${relativePath}` + try { + const content = fs.readFileSync(fileToProcess, 'utf8') + const answer = await callEditor( + editorType, + content, + options.write || false, + options.verbose || false, + ) + spinner.stop() + + if (options.write) { + if (editorType === 'intro') { + // For frontmatter addition/modification, merge properties instead of overwriting entire file + const updatedContent = mergeFrontmatterProperties(fileToProcess, answer) + fs.writeFileSync(fileToProcess, updatedContent, 'utf8') + console.log(`āœ… Added frontmatter properties to: ${relativePath}`) + } else { + // For other editor types, write the full result back to the original file + fs.writeFileSync(fileToProcess, answer, 'utf8') + console.log(`āœ… Updated: ${relativePath}`) + } + } else { + // Just output to console (current behavior) + if (filesToProcess.length > 1) { + console.log(`\n=== ${relativePath} ===`) + } + console.log(answer) + } + } catch (err) { + const error = err as Error + spinner.fail(`Error processing ${relativePath}: ${error.message}`) + process.exitCode = 1 + } finally { + spinner.stop() + } + } + } catch (err) { + const error = err as Error + const targetName = path.relative(process.cwd(), filePath) + spinner.fail(`Error processing ${targetName}: ${error.message}`) + process.exitCode = 1 } - } catch (err) { - const error = err as Error - spinner.fail(`Error processing file ${file}: ${error.message}`) - process.exitCode = 1 } } spinner.stop() + + // Exit with appropriate code based on whether any errors occurred + if (process.exitCode) { + process.exit(process.exitCode) + } })() }) program.parse(process.argv) +// Handle graceful shutdown +process.on('SIGINT', () => { + console.log('\n\nšŸ›‘ Process interrupted by user') + process.exit(0) +}) + +process.on('SIGTERM', () => { + console.log('\n\nšŸ›‘ Process terminated') + process.exit(0) +}) + interface PromptMessage { content: string role: string @@ -139,26 +286,111 @@ interface PromptData { max_tokens?: number } -async function callEditor( - editorType: keyof EditorTypes, - content: string, - writeMode: boolean, -): Promise { - const markdownPromptPath = path.join(promptDir, `${editorType}.md`) - let markdownPrompt = fs.readFileSync(markdownPromptPath, 'utf8') +// Function to merge new frontmatter properties into existing file while preserving formatting +function mergeFrontmatterProperties(filePath: string, newPropertiesYaml: string): string { + const content = fs.readFileSync(filePath, 'utf8') + const parsed = readFrontmatter(content) - // For intro type in write mode, append special instructions - if (editorType === 'intro' && writeMode) { - markdownPrompt += - '\n\n**WRITE MODE**: Output only the complete updated file content with the new intro in the frontmatter. Do not include analysis or explanations - just return the file ready to write.' + if (parsed.errors && parsed.errors.length > 0) { + throw new Error( + `Failed to parse frontmatter: ${parsed.errors.map((e) => e.message).join(', ')}`, + ) } + if (!parsed.content) { + throw new Error('Failed to parse content from file') + } + + try { + // Clean up the AI response - remove markdown code blocks if present + let cleanedYaml = newPropertiesYaml.trim() + cleanedYaml = cleanedYaml.replace(/^```ya?ml\s*\n/i, '') + cleanedYaml = cleanedYaml.replace(/\n```\s*$/i, '') + cleanedYaml = cleanedYaml.trim() + + interface FrontmatterProperties { + intro?: string + [key: string]: unknown + } + const newProperties = yaml.load(cleanedYaml) as FrontmatterProperties + + // Security: Validate against prototype pollution using the official frontmatter schema + const allowedKeys = Object.keys(schema.properties) + + const sanitizedProperties = Object.fromEntries( + Object.entries(newProperties).filter(([key]) => { + if (allowedKeys.includes(key)) { + return true + } + console.warn(`Filtered out potentially unsafe frontmatter key: ${key}`) + return false + }), + ) + + // Merge new properties with existing frontmatter + const mergedData: FrontmatterProperties = { ...parsed.data, ...sanitizedProperties } + + // Manually ensure intro is wrapped in single quotes in the final output + let result = readFrontmatter.stringify(parsed.content, mergedData) + + // Post-process to ensure intro field has single quotes + if (newProperties.intro) { + const introValue = newProperties.intro.toString() + // Replace any quote style on intro with single quotes + result = result.replace( + /^intro:\s*(['"`]?)([^'"`\n\r]+)\1?\s*$/m, + `intro: '${introValue.replace(/'/g, "''")}'`, // Escape single quotes by doubling them + ) + } + return result + } catch (error) { + console.error('Failed to parse AI response as YAML:') + console.error('Raw AI response:', JSON.stringify(newPropertiesYaml)) + throw new Error(`Failed to parse new frontmatter properties: ${error}`) + } +} + +async function callEditor( + editorType: string, + content: string, + writeMode: boolean, + verbose = false, +): Promise { + const markdownPromptPath = path.join(promptDir, `${String(editorType)}.md`) + + if (!fs.existsSync(markdownPromptPath)) { + throw new Error(`Prompt file not found: ${markdownPromptPath}`) + } + + const markdownPrompt = fs.readFileSync(markdownPromptPath, 'utf8') + const prompt = yaml.load(fs.readFileSync(promptTemplatePath, 'utf8')) as PromptData + // Validate the prompt template has required properties + if (!prompt.messages || !Array.isArray(prompt.messages)) { + throw new Error('Invalid prompt template: missing or invalid messages array') + } + for (const msg of prompt.messages) { msg.content = msg.content.replace('{{markdownPrompt}}', markdownPrompt) msg.content = msg.content.replace('{{input}}', content) + // Replace writeMode template variable with simple string replacement + msg.content = msg.content.replace( + //g, + writeMode ? '' : '', + ) + msg.content = msg.content.replace( + //g, + writeMode ? '' : '', + ) + msg.content = msg.content.replace( + //g, + writeMode ? '' : '', + ) + + // Remove sections marked for removal + msg.content = msg.content.replace(/[\s\S]*?/g, '') } - return callModelsApi(prompt) + return callModelsApi(prompt, verbose) }