1
0
mirror of synced 2026-01-22 18:03:38 -05:00

Script to turn all documents to a JSON file (#50260)

Co-authored-by: docs-bot <docs-bot@users.noreply.github.com>
This commit is contained in:
Peter Bengtsson
2024-05-16 08:18:53 -04:00
committed by GitHub
parent a30be9603a
commit f6a95d5b79
4 changed files with 298 additions and 0 deletions

View File

@@ -0,0 +1,137 @@
/**
* You specify one or more languages and versions, and this script
* will output a JSON file with the metadata needed.
* You run it with:
*
* npm run all-documents -- -o /tmp/all-documents.json
*
* By default, it will do free-pro-team, enterprise-cloud, and whatever
* the latest enterprise-server is. You can specify versions with: --version
* For example:
*
* npm run all-documents -- -v free-pro-team@latest -v ghes-3.12
*
* By default it will include all languages, but you can specify
* with --language
*
* npm run all-documents -- -l en -l de
*
* For debugging purposes, because there are so *many* documents you can
* apply a filter by URL matching, for example:
*
* npm run all-documents -- -f get-started/using-github
*
* This will only include documents whose URL contains the string
* 'get-started/using-github'.
*
* If you don't specify an output file (the --output flag or -o for short),
* it will print all the JSON to stdout.
*
* By default the fields set to include are: title, shortTitle, intro, url.
* You can instead specify the fields you only want. For example
*
* npm run all-documents -- --field url --field title
*
* Now the JSON will look like this:
*
* ...
* {"title": "Some title", "url": "/some-url"}
* ...
*/
import { writeFileSync, statSync } from 'fs'
import { program, Option } from 'commander'
import { languageKeys } from '@/languages/lib/languages.js'
import { allVersions } from '@/versions/lib/all-versions.js'
import { allDocuments, POSSIBLE_FIELDS, type AllDocument } from './lib'
// E.g. enteprise-server@3.12, free-pro-team@latest, etc
const fullVersions = Object.keys(allVersions)
const defaultVersions: string[] = []
const shortAlias = new Map<string, string>()
for (const [version, info] of Object.entries(allVersions)) {
shortAlias.set(info.openApiVersionName, version)
if (info.hasNumberedReleases) {
if (info.latestRelease === info.currentRelease) {
defaultVersions.push(version)
}
} else {
defaultVersions.push(version)
}
}
program
.description("Generate a JSON output of all documents' metadata")
.addOption(
new Option('-l, --language <language...>', 'Specific languages(s)').choices(languageKeys),
)
.addOption(
new Option('-v, --version <version...>', 'Specific version(s)').choices([
...fullVersions,
...shortAlias.keys(),
]),
)
.addOption(
new Option('--field <field...>', 'Fields to include for each document (multiple)').choices(
POSSIBLE_FIELDS,
),
)
.option('-f, --filter <search>', 'Only for matched files (most for debugging)')
.option('-o, --output <output-file>', 'Output file', 'all-documents.json')
.action(main)
program.parse(process.argv)
type Options = {
version?: string[]
language?: string[]
field?: string[]
output: string
filter?: string
}
async function main(options: Options) {
const languages = options.language ? options.language : languageKeys
const versions: string[] = []
for (const v of options.version || defaultVersions) {
if (shortAlias.has(v)) {
versions.push(shortAlias.get(v)!)
} else {
versions.push(v)
}
}
const filter = options.filter
const fields = options.field || POSSIBLE_FIELDS
const t0 = new Date()
const documents = await allDocuments({
languages,
versions,
filter,
fields,
})
const t1 = new Date()
const toJson: AllDocument[] = []
for (const doc of documents) {
const { documents, ...rest } = doc
toJson.push({
...rest,
documents,
})
}
const toString = JSON.stringify(toJson, null, 2)
const outFile = options.output
writeFileSync(outFile, toString)
const seconds = (t1.getTime() - t0.getTime()) / 1000
const size = statSync(outFile).size
console.log(`Wrote ${outFile} (${fileSize(size)}). Took ${seconds.toFixed(1)} seconds.`)
}
const fileSize = (bytes: number) => {
if (bytes > 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)}Mb`
if (bytes > 1024) return `${(bytes / 1024).toFixed(1)}Kb`
return `${bytes} bytes`
}

View File

@@ -0,0 +1,118 @@
import contextualize from '@/frame/middleware/context/context.js'
import features from '@/versions/middleware/features.js'
import shortVersions from '@/versions/middleware/short-versions.js'
import warmServer from '@/frame/lib/warm-server.js'
export const POSSIBLE_FIELDS = ['title', 'shortTitle', 'intro', 'url']
type Document = {
title?: string
shortTitle?: string | null
intro?: string
url?: string
}
export type AllDocument = {
version: string
language: string
documents: Document[]
}
type Permalink = {
languageCode: string
pageVersion: string
title: string
href: string
}
type Page = {
permalinks: Permalink[]
fullPath: string
title: string
shortTitle?: string
intro: string
languageCode: string
documentType: string
renderProp: (prop: string, context: any, opts: any) => Promise<string>
}
type Options = {
languages: string[]
versions: string[]
fields: string[]
filter?: string
}
export async function allDocuments(options: Options): Promise<AllDocument[]> {
const { filter, versions, languages, fields } = options
const site = await warmServer(options.languages)
const pages: Page[] = site.pageList
const allDocuments: AllDocument[] = []
type ByVersion = Map<string, Document[]>
const byLanguage = new Map<string, ByVersion>()
for (const page of pages) {
if (!languages.includes(page.languageCode)) {
continue
}
for (const permalink of page.permalinks) {
if (filter && !permalink.href.includes(filter)) {
continue
}
if (!versions.includes(permalink.pageVersion)) {
continue
}
const next = () => {}
const res = {}
const pagePath = permalink.href
const context: any = {}
const req = {
path: pagePath,
language: permalink.languageCode,
pagePath,
cookies: {},
query: {},
context,
}
await contextualize(req, res, next)
await shortVersions(req, res, next)
req.context.page = page
await features(req, res, next)
const title = fields.includes('title')
? await page.renderProp('title', req.context, { textOnly: true })
: undefined
const shortTitle = fields.includes('shortTitle')
? page.shortTitle
? await page.renderProp('shortTitle', req.context, { textOnly: true })
: null
: undefined
const intro = fields.includes('intro')
? await page.renderProp('intro', req.context, { textOnly: true })
: undefined
const url = fields.includes('url') ? permalink.href : undefined
if (!byLanguage.has(permalink.languageCode)) {
byLanguage.set(permalink.languageCode, new Map())
}
const byVersion = byLanguage.get(permalink.languageCode) as ByVersion
if (!byVersion.has(permalink.pageVersion)) {
byVersion.set(permalink.pageVersion, [])
}
byVersion.get(permalink.pageVersion)?.push({ title, shortTitle, intro, url })
}
}
for (const [language, byVersion] of byLanguage) {
for (const [version, documents] of byVersion) {
allDocuments.push({ version, language, documents })
}
}
return allDocuments
}