1
0
mirror of synced 2025-12-23 21:07:12 -05:00

Script to audit a top-level content dir (#56208)

This commit is contained in:
Sarah Schneider
2025-06-24 21:10:51 -04:00
committed by GitHub
parent 62ab1ee78c
commit 983b8c2436
2 changed files with 101 additions and 0 deletions

View File

@@ -6,6 +6,10 @@ See documentation below for:
Run this on any GitHub Docs URL to gather a set of metrics about it.
* [docsaudit](#docsaudit)
Run this on a top-level content directory to gather info about its files and output to a CSV.
Print usage info for any script in this directory:
```bash
@@ -70,6 +74,23 @@ To use `docstat` from any location in Terminal, set up a global alias:
```
Now you can run `docstat <url>` from any directory.
## docsaudit
Run `docsaudit` on a top-level content directory to gather data about its files—including title, path, versions, 30d views, and 30d users—and output it to a CSV file.
To see the available options:
```
tsx src/metrics/scripts/docsaudit.js --help
```
Run the script on any top-level content directory:
```
tsx src/metrics/scripts/docsaudit.js <content directory name>
```
For example:
```
tsx src/metrics/scripts/docsaudit.js actions
```
## Future development
Applies to all scripts in this directory:

View File

@@ -0,0 +1,80 @@
#!/usr/bin/env node
import fs from 'fs'
import path from 'path'
import { fileURLToPath } from 'url'
import { Command } from 'commander'
import walkFiles from '#src/workflows/walk-files.ts'
import readFrontmatter from '@/frame/lib/read-frontmatter.js'
import { getKustoClient } from '#src/metrics/lib/kusto-client.js'
import { getDates } from 'src/metrics/lib/dates.js'
import { getViews } from '#src/metrics/queries/views.js'
import { getUsers } from '#src/metrics/queries/users.js'
const __filename = fileURLToPath(import.meta.url)
const __dirname = path.dirname(__filename)
const ROOTDIR = process.cwd()
const program = new Command()
program
.name('docsaudit')
.description('Get data about a top-level docs product and output a CSV')
.argument('<auditDir>', 'Name of the content directory you want to audit, e.g., actions')
.option('-r, --range <days>', 'Number of days to look back', 30)
.option('--verbose', 'Display Kusto queries being executed')
.parse(process.argv)
const options = program.opts()
const [auditDirName] = program.args
const contentDir = path.join(ROOTDIR, 'content')
const auditDir = path.join(contentDir, auditDirName)
const outputFile = path.join(__dirname, `${auditDirName}-audit.csv`)
if (!fs.existsSync(auditDir)) {
console.error(`${auditDirName} not found`)
process.exit(1)
}
// Get dates object in format { endDate, startDate, friendlyRange }
const dates = getDates(options.range)
const files = walkFiles(auditDir, ['.md'])
console.log(`Auditing the ${files.length} "${auditDirName}" files. This may take a while.\n`)
main()
async function main() {
const client = getKustoClient()
let csvString = `title,path,versions,${options.range}d views,${options.range}d users\n`
console.log(`Assembling data for these CSV columns: ${csvString}`)
// Get the title, path, and versions from the filesystem
// Get the views and users from the Kusto API
const results = []
for (const file of files) {
const contents = await fs.promises.readFile(file)
const contentPath = path.relative(ROOTDIR, file)
const { data } = readFrontmatter(contents)
const versionString = JSON.stringify(data.versions).replaceAll('"', "'")
const pathToQuery = getPathToQuery(file)
// Pass null to get all versions (the default if no version is provided)
const version = null
// Only pass true for verbose on the first iteration
const isFirst = results.length === 0
const views = await getViews(pathToQuery, client, dates, version, options.verbose && isFirst)
const users = await getUsers(pathToQuery, client, dates, version, options.verbose && isFirst)
const csvEntry = `"${data.title}","${contentPath}","${versionString}","${views}","${users}"`
console.log(csvEntry)
results.push(csvEntry)
}
csvString += results.join('\n') + '\n'
fs.writeFileSync(outputFile, csvString.trim(), 'utf8')
console.log(`Done! Wrote ${outputFile}`)
}
function getPathToQuery(file) {
return path.relative(contentDir, file).replace('/index.md', '').replace('.md', '')
}