From 7505613264521fd83cf5cd156c84ff11388e03de Mon Sep 17 00:00:00 2001 From: Peter Bengtsson Date: Mon, 6 Mar 2023 14:44:59 -0500 Subject: [PATCH] Reloading the site tree (#34877) --- lib/create-tree.js | 89 ++++++++++++++++++++++++++++++--------- middleware/index.js | 2 + middleware/reload-tree.js | 72 +++++++++++++++++++++++++++++++ 3 files changed, 144 insertions(+), 19 deletions(-) create mode 100644 middleware/reload-tree.js diff --git a/lib/create-tree.js b/lib/create-tree.js index 80b1d8ba68..719a04fef7 100644 --- a/lib/create-tree.js +++ b/lib/create-tree.js @@ -3,38 +3,68 @@ import fs from 'fs/promises' import Page from './page.js' -export default async function createTree(originalPath, rootPath) { +export default async function createTree(originalPath, rootPath, previousTree) { const basePath = rootPath || originalPath // On recursive runs, this is processing page.children items in `/` format. // If the path exists as is, assume this is a directory with a child index.md. // Otherwise, assume it's a child .md file and add `.md` to the path. let filepath + let mtime + // This kills two birds with one stone. We (attempt to) read it as a file, + // to find out if it's a directory or a file and whence we know that + // we also collect it's modification time. try { - await fs.access(originalPath) - filepath = `${originalPath}/index.md` - } catch { filepath = `${originalPath}.md` + mtime = await getMtime(filepath) + } catch (error) { + if (error.code !== 'ENOENT') { + throw error + } + filepath = `${originalPath}/index.md` + // Note, if this throws, that's quite fine. It usually means that + // there's a `index.md` whose `children:` entry lists something that + // doesn't exist on disk. So the writer who tries to preview the + // page will see the error and it's hopefully clear what's actually + // wrong. + try { + mtime = await getMtime(filepath) + } catch (error) { + if (error.code === 'ENOENT' && filepath.split(path.sep).includes('early-access')) { + // Do not throw an error if Early Access is not available. + console.warn( + `${filepath} could not be turned into a Page, but is ignored because it's early-access` + ) + return + } + throw error + } } const relativePath = filepath.replace(`${basePath}/`, '') - // Initialize the Page! This is where the file reads happen. - const page = await Page.init({ - basePath, - relativePath, - languageCode: 'en', - }) + // Reading in a file from disk is slow and best avoided if we can be + // certain it isn't necessary. If the previous tree is known and that + // tree's page node's `mtime` hasn't changed, we can use that instead. + let page + if (previousTree && previousTree.page.mtime === mtime) { + // A save! We can use the same exact Page instance from the previous + // tree because the assumption is that since the `.md` file it was + // created from hasn't changed (on disk) the instance object wouldn't + // change. + page = previousTree.page + } else { + // Either the previous tree doesn't exist yet or the modification time + // of the file on disk has changed. + page = await Page.init({ + basePath, + relativePath, + languageCode: 'en', + mtime, + }) + } if (!page) { - // Do not throw an error if Early Access is not available. - if (relativePath.startsWith('early-access')) { - console.warn( - `${relativePath} could not be turned into a Page, but is ignore because it's early-access` - ) - return - } - throw Error(`Cannot initialize page for ${filepath}`) } @@ -49,7 +79,12 @@ export default async function createTree(originalPath, rootPath) { item.childPages = ( await Promise.all( item.page.children.map( - async (child) => await createTree(path.posix.join(originalPath, child), basePath) + async (child, i) => + await createTree( + path.posix.join(originalPath, child), + basePath, + previousTree && previousTree.childPages[i] + ) ) ) ).filter(Boolean) @@ -58,6 +93,22 @@ export default async function createTree(originalPath, rootPath) { return item } +async function getMtime(filePath) { + // Use mtimeMs, which is a regular floating point number, instead of the + // mtime which is a Date based on that same number. + // Otherwise, if we use the Date instances, we have to compare + // them using `oneDate.getTime() === anotherDate.getTime()`. + const { mtimeMs } = await fs.stat(filePath) + // The `mtimeMs` is a number like `1669827766942.7954` + // From the docs: + // "The timestamp indicating the last time this file was modified expressed + // in nanoseconds since the POSIX Epoch." + // But the number isn't actually all that important. We just need it to + // later be able to know if it changed. We round it to the nearest + // millisecond. + return Math.round(mtimeMs) +} + function assertUniqueChildren(page) { if (page.children.length !== new Set(page.children).size) { const count = {} diff --git a/middleware/index.js b/middleware/index.js index eb5deef1d3..8e37a657ac 100644 --- a/middleware/index.js +++ b/middleware/index.js @@ -20,6 +20,7 @@ import handleErrors from './handle-errors.js' import handleInvalidPaths from './handle-invalid-paths.js' import handleNextDataPath from './handle-next-data-path.js' import detectLanguage from './detect-language.js' +import reloadTree from './reload-tree.js' import context from './context.js' import shortVersions from './contextualizers/short-versions.js' import languageCodeRedirects from './redirects/language-code-redirects.js' @@ -212,6 +213,7 @@ export default function (app) { // *** Config and context for redirects *** app.use(reqUtils) // Must come before events app.use(instrument(detectLanguage, './detect-language')) // Must come before context, breadcrumbs, find-page, handle-errors, homepages + app.use(asyncMiddleware(instrument(reloadTree, './reload-tree'))) // Must come before context app.use(asyncMiddleware(instrument(context, './context'))) // Must come before early-access-*, handle-redirects app.use(instrument(shortVersions, './contextualizers/short-versions')) // Support version shorthands diff --git a/middleware/reload-tree.js b/middleware/reload-tree.js new file mode 100644 index 0000000000..9752805702 --- /dev/null +++ b/middleware/reload-tree.js @@ -0,0 +1,72 @@ +/** + * This exists for local previewing. Only. + * We load in the entire tree on startup, then that's used for things like + * sidebars and breadcrumbs and landing pages and ToC pages (and possibly + * more). + * When an individual page is requested, we always reload it from disk + * in case it has changed. But that's not feasible with all 1k+ pages. + * + * The core of this middleware calls `createTree()` but by passing the + * optional previous tree so that within `createTree` it can opt to + * re-use those that haven't changed on disk. + * + * The intention here is so that things like sidebars can refresh + * without having to restart the entire server. + */ + +import path from 'path' + +import languages, { languageKeys } from '../lib/languages.js' +import createTree from '../lib/create-tree.js' +import warmServer from '../lib/warm-server.js' +import { loadSiteTree, loadPages, loadPageMap } from '../lib/page-data.js' +import loadRedirects from '../lib/redirects/precompile.js' + +const languagePrefixRegex = new RegExp(`^/(${languageKeys.join('|')})(/|$)`) +const englishPrefixRegex = /^\/en(\/|$)/ + +const isDev = process.env.NODE_ENV === 'development' + +export default async function reloadTree(req, res, next) { + if (!isDev) return next() + // Filter out things like `/will/redirect` or `/_next/data/...` + if (!languagePrefixRegex.test(req.pagePath)) return next() + // We only bother if the loaded URL is something `/en/...` + if (!englishPrefixRegex.test(req.pagePath)) return next() + + const warmed = await warmServer() + // For all the real English content, this usually takes about 30-60ms on + // an Intel MacbookPro. + const before = getMtimes(warmed.unversionedTree.en) + warmed.unversionedTree.en = await createTree( + path.join(languages.en.dir, 'content'), + undefined, + warmed.unversionedTree.en + ) + const after = getMtimes(warmed.unversionedTree.en) + // The next couple of operations are much slower (in total) than + // refrehing the tree. So we want to know if the tree changed before + // bothering. + // If refreshing of the `.en` part of the `unversionedTree` takes 40ms + // then the following operations takes about 140ms. + if (before !== after) { + warmed.siteTree = await loadSiteTree(warmed.unversionedTree) + warmed.pageList = await loadPages(warmed.unversionedTree) + warmed.pageMap = await loadPageMap(warmed.pageList) + warmed.redirects = await loadRedirects(warmed.pageList) + } + + return next() +} + +// Given a tree, return a number that represents the mtimes for all pages +// in the tree. +// You can use this to compute it before and after the tree is (maybe) +// mutated and if the numbers *change* you can know the tree changed. +function getMtimes(tree) { + let mtimes = tree.page.mtime + for (const child of tree.childPages || []) { + mtimes += getMtimes(child) + } + return mtimes +}