294 lines
9.9 KiB
TypeScript
294 lines
9.9 KiB
TypeScript
import express from 'express'
|
|
import type { NextFunction, RequestHandler, Response } from 'express'
|
|
|
|
import type { ExtendedRequest, Page, Context, Permalink } from '@/types'
|
|
import statsd from '@/observability/lib/statsd.js'
|
|
import { defaultCacheControl } from '@/frame/middleware/cache-control.js'
|
|
import catchMiddlewareError from '@/observability/middleware/catch-middleware-error.js'
|
|
import {
|
|
SURROGATE_ENUMS,
|
|
setFastlySurrogateKey,
|
|
makeLanguageSurrogateKey,
|
|
} from '@/frame/middleware/set-fastly-surrogate-key.js'
|
|
import shortVersions from '@/versions/middleware/short-versions.js'
|
|
import contextualize from '@/frame/middleware/context/context'
|
|
import features from '@/versions/middleware/features.js'
|
|
import getRedirect from '@/redirects/lib/get-redirect.js'
|
|
import { isArchivedVersionByPath } from '@/archives/lib/is-archived-version.js'
|
|
import { readCompressedJsonFile } from '@/frame/lib/read-json-file.js'
|
|
|
|
const router = express.Router()
|
|
|
|
// If you have pre-computed page info into a JSON file on disk, this is
|
|
// where it would be expected to be found.
|
|
// Note that if the file does not exist, it will be ignored and
|
|
// every pageinfo is computed every time.
|
|
// Note! The only reason this variable is exported is so that
|
|
// it can be imported by the script scripts/precompute-pageinfo.ts
|
|
export const CACHE_FILE_PATH = '.pageinfo-cache.json.br'
|
|
|
|
type ArchivedVersion = {
|
|
isArchived?: boolean
|
|
requestedVersion?: string
|
|
}
|
|
|
|
type ExtendedRequestWithPageInfo = ExtendedRequest & {
|
|
pageinfo: {
|
|
pathname: string
|
|
page?: Page
|
|
archived?: ArchivedVersion
|
|
}
|
|
}
|
|
|
|
const validationMiddleware = (
|
|
req: ExtendedRequestWithPageInfo,
|
|
res: Response,
|
|
next: NextFunction,
|
|
) => {
|
|
const pathname = req.query.pathname as string | string[] | undefined
|
|
if (!pathname) {
|
|
return res.status(400).json({ error: `No 'pathname' query` })
|
|
}
|
|
if (Array.isArray(pathname)) {
|
|
return res.status(400).json({ error: "Multiple 'pathname' keys" })
|
|
}
|
|
if (!pathname.trim()) {
|
|
return res.status(400).json({ error: `'pathname' query empty` })
|
|
}
|
|
if (!pathname.startsWith('/')) {
|
|
return res.status(400).json({ error: `'pathname' has to start with /` })
|
|
}
|
|
if (/\s/.test(pathname)) {
|
|
return res.status(400).json({ error: `'pathname' cannot contain whitespace` })
|
|
}
|
|
req.pageinfo = { pathname }
|
|
return next()
|
|
}
|
|
|
|
const pageinfoMiddleware = (
|
|
req: ExtendedRequestWithPageInfo,
|
|
res: Response,
|
|
next: NextFunction,
|
|
) => {
|
|
let { pathname } = req.pageinfo
|
|
// We can't use the `findPage` middleware utility function because we
|
|
// need to know when the pathname is a redirect.
|
|
// This is important so that the final `pathname` value
|
|
// matches the page's permalinks.
|
|
// This is important when rendering a page because of translations,
|
|
// if it needs to do a fallback, it needs to know the correct
|
|
// equivalent English page.
|
|
|
|
if (!req.context || !req.context.pages || !req.context.redirects)
|
|
throw new Error('request not yet contextualized')
|
|
|
|
const redirectsContext = { pages: req.context.pages, redirects: req.context.redirects }
|
|
|
|
// Similar to how the `handle-redirects.js` middleware works, let's first
|
|
// check if the URL is just having a trailing slash.
|
|
while (pathname.endsWith('/') && pathname.length > 1) {
|
|
pathname = pathname.slice(0, -1)
|
|
}
|
|
|
|
// E.g. a request for `/` is handled as a redirect outside the
|
|
// getRedirect() function.
|
|
if (pathname === '/') {
|
|
pathname = `/${req.context.currentLanguage}`
|
|
}
|
|
|
|
if (!(pathname in req.context.pages)) {
|
|
// If a pathname is not a known page, it might *either* be a redirect,
|
|
// or an archived enterprise version, or both.
|
|
// That's why it's import to not bother looking at the redirects
|
|
// if the pathname is an archived enterprise version.
|
|
// This mimics how our middleware work and their order.
|
|
req.pageinfo.archived = isArchivedVersionByPath(pathname) as ArchivedVersion
|
|
if (!req.pageinfo.archived.isArchived) {
|
|
const redirect = getRedirect(pathname, redirectsContext)
|
|
if (redirect) {
|
|
pathname = redirect
|
|
}
|
|
}
|
|
}
|
|
|
|
// Remember this might yield undefined if the pathname is not a page
|
|
req.pageinfo.page = req.context.pages[pathname]
|
|
// The pathname might have changed if it was a redirect
|
|
req.pageinfo.pathname = pathname
|
|
|
|
return next()
|
|
}
|
|
|
|
export async function getPageInfo(page: Page, pathname: string) {
|
|
const mockedContext: Context = {}
|
|
const renderingReq = {
|
|
path: pathname,
|
|
language: page.languageCode,
|
|
pagePath: pathname,
|
|
cookies: {},
|
|
context: mockedContext,
|
|
}
|
|
const next = () => {}
|
|
const res = {}
|
|
await contextualize(renderingReq as ExtendedRequest, res as Response, next)
|
|
await shortVersions(renderingReq, res, next)
|
|
renderingReq.context.page = page
|
|
features(renderingReq as ExtendedRequest, res as Response, next)
|
|
const context = renderingReq.context
|
|
|
|
const title = await page.renderProp('title', context, { textOnly: true })
|
|
const intro = await page.renderProp('intro', context, { textOnly: true })
|
|
|
|
let productPage = null
|
|
for (const permalink of page.permalinks) {
|
|
const rootHref = permalink.href
|
|
.split('/')
|
|
.slice(0, permalink.pageVersion === 'free-pro-team@latest' ? 3 : 4)
|
|
.join('/')
|
|
if (!context.pages) throw new Error('context.pages not yet set')
|
|
const rootPage = context.pages[rootHref]
|
|
if (rootPage) {
|
|
productPage = rootPage
|
|
break
|
|
}
|
|
}
|
|
const product = productPage ? await getProductPageInfo(productPage, context) : ''
|
|
|
|
return { title, intro, product }
|
|
}
|
|
|
|
const _productPageCache: {
|
|
[key: string]: string
|
|
} = {}
|
|
// The title of the product is much easier to cache because it's often
|
|
// repeated. What determines the title of the product is the language
|
|
// and the version. A lot of pages have the same title for the product.
|
|
async function getProductPageInfo(page: Page, context: Context) {
|
|
const cacheKey = `${page.relativePath}:${context.currentVersion}:${context.currentLanguage}`
|
|
if (!(cacheKey in _productPageCache)) {
|
|
const title =
|
|
(await page.renderProp('shortTitle', context, {
|
|
textOnly: true,
|
|
})) ||
|
|
(await page.renderProp('title', context, {
|
|
textOnly: true,
|
|
}))
|
|
_productPageCache[cacheKey] = title
|
|
}
|
|
return _productPageCache[cacheKey]
|
|
}
|
|
|
|
type CachedPageInfo = {
|
|
[url: string]: {
|
|
title: string
|
|
intro: string
|
|
product: string
|
|
cacheInfo?: string
|
|
}
|
|
}
|
|
|
|
let _cache: CachedPageInfo | null = null
|
|
async function getPageInfoFromCache(page: Page, pathname: string) {
|
|
let cacheInfo = ''
|
|
if (_cache === null) {
|
|
try {
|
|
_cache = readCompressedJsonFile(CACHE_FILE_PATH) as CachedPageInfo
|
|
cacheInfo = 'initial-load'
|
|
} catch (error) {
|
|
cacheInfo = 'initial-fail'
|
|
if (error instanceof Error && (error as any).code !== 'ENOENT') {
|
|
throw error
|
|
}
|
|
_cache = {}
|
|
}
|
|
}
|
|
|
|
let info = _cache[pathname]
|
|
if (!cacheInfo) {
|
|
cacheInfo = info ? 'hit' : 'miss'
|
|
}
|
|
if (!info) {
|
|
info = await getPageInfo(page, pathname)
|
|
// You might wonder; why do we not store this compute information
|
|
// into the `_cache` from here?
|
|
// The short answer is; it won't be used again.
|
|
// In production, which is the only place where performance matters,
|
|
// a HTTP GET request will only happen once per deployment. That's
|
|
// because the CDN will cache it until the next deployment (which is
|
|
// followed by a CDN purge).
|
|
// In development (local preview), the performance doesn't really matter.
|
|
// In CI, we use the caching because the CI runs
|
|
// `npm run precompute-pageinfo` right before it runs vitest tests.
|
|
}
|
|
info.cacheInfo = cacheInfo
|
|
return info
|
|
}
|
|
|
|
router.get(
|
|
'/v1',
|
|
validationMiddleware as RequestHandler,
|
|
pageinfoMiddleware as RequestHandler,
|
|
catchMiddlewareError(async function pageInfo(req: ExtendedRequestWithPageInfo, res: Response) {
|
|
// Remember, the `validationMiddleware` will use redirects if the
|
|
// `pathname` used is a redirect (e.g. /en/articles/foo or
|
|
// /articles or '/en/enterprise-server@latest/foo/bar)
|
|
// So by the time we get here, the pathname should be one of the
|
|
// page's valid permalinks.
|
|
const { page, pathname, archived } = req.pageinfo
|
|
|
|
if (archived && archived.isArchived) {
|
|
const { requestedVersion } = archived
|
|
const title = `GitHub Enterprise Server ${requestedVersion} Help Documentation`
|
|
const intro = ''
|
|
const product = 'GitHub Enterprise Server'
|
|
defaultCacheControl(res)
|
|
return res.json({ info: { intro, title, product } })
|
|
}
|
|
|
|
if (!page) {
|
|
return res.status(400).json({ error: `No page found for '${pathname}'` })
|
|
}
|
|
|
|
const pagePermalinks = page.permalinks.map((p: Permalink) => p.href)
|
|
if (!pagePermalinks.includes(pathname)) {
|
|
throw new Error(`pathname '${pathname}' not one of the page's permalinks`)
|
|
}
|
|
|
|
const fromCache = await getPageInfoFromCache(page, pathname)
|
|
const { cacheInfo, ...info } = fromCache
|
|
|
|
const tags = [
|
|
// According to https://docs.datadoghq.com/getting_started/tagging/#define-tags
|
|
// the max length of a tag is 200 characters. Most of ours are less than
|
|
// that but we truncate just to be safe.
|
|
`pathname:${pathname}`.slice(0, 200),
|
|
`language:${page.languageCode}`,
|
|
`cache:${cacheInfo}`,
|
|
]
|
|
statsd.increment('pageinfo.lookup', 1, tags)
|
|
|
|
defaultCacheControl(res)
|
|
|
|
// This is necessary so that the `Surrogate-Key` header is set with
|
|
// the correct language surrogate key bit. By default, it's set
|
|
// from the pathname but `/api/**` URLs don't have a language
|
|
// (other than the default 'en').
|
|
// We do this so that all of these URLs are cached in Fastly by language
|
|
// which we need for the staggered purge.
|
|
|
|
setFastlySurrogateKey(
|
|
res,
|
|
`${SURROGATE_ENUMS.DEFAULT} ${makeLanguageSurrogateKey(page.languageCode)}`,
|
|
true,
|
|
)
|
|
res.status(200).json({ info })
|
|
}),
|
|
)
|
|
|
|
// Alias for the latest version
|
|
router.get('/', (req, res) => {
|
|
res.redirect(307, req.originalUrl.replace('/pageinfo', '/pageinfo/v1'))
|
|
})
|
|
|
|
export default router
|