diff --git a/lib/render-content/create-processor.js b/lib/render-content/create-processor.js index c010e0bae3..4adafd2169 100644 --- a/lib/render-content/create-processor.js +++ b/lib/render-content/create-processor.js @@ -18,6 +18,7 @@ import HighlightjsGraphql from 'highlightjs-graphql' import remarkCodeExtra from 'remark-code-extra' import codeHeader from './plugins/code-header.js' import rewriteLocalLinks from './plugins/rewrite-local-links.js' +import rewriteImgSources from './plugins/rewrite-asset-urls.js' import useEnglishHeadings from './plugins/use-english-headings.js' import rewriteLegacyAssetPaths from './plugins/rewrite-legacy-asset-paths.js' import wrapInElement from './plugins/wrap-in-element.js' @@ -45,6 +46,7 @@ export default function createProcessor(context) { .use(raw) .use(rewriteLegacyAssetPaths, context) .use(wrapInElement, { selector: 'ol > li img', wrapper: 'span.procedural-image-wrapper' }) + .use(rewriteImgSources) .use(rewriteLocalLinks, { languageCode: context.currentLanguage, version: context.currentVersion, diff --git a/lib/render-content/plugins/rewrite-asset-urls.js b/lib/render-content/plugins/rewrite-asset-urls.js new file mode 100644 index 0000000000..4808777a2d --- /dev/null +++ b/lib/render-content/plugins/rewrite-asset-urls.js @@ -0,0 +1,52 @@ +import fs from 'fs' + +import { visit } from 'unist-util-visit' + +// Matches any tags with an href that starts with `/assets/` or '/public/' +const matcher = (node) => + node.type === 'element' && + node.tagName === 'img' && + node.properties && + node.properties.src && + (node.properties.src.startsWith('/assets/') || node.properties.src.startsWith('/public/')) + +// Content authors write images like `![Alt](/assets/images/foo.png`, but +// for caching purposes we want to rewrite those so they can be cached +// indefinitely. +export default function rewriteImgSources() { + return (tree) => { + visit(tree, matcher, (node) => { + const newSrc = getNewSrc(node) + if (newSrc) { + node.properties.src = newSrc + } + }) + } +} + +function getNewSrc(node) { + const { src } = node.properties + if (!src.startsWith('/')) return + + try { + const filePath = src.slice(1) + const stats = fs.statSync(filePath) + // The size is not perfect but it's good enough. The size is + // very fast to pick up without needing to do a deep hashing of the + // image's content. It's perfectly possible that someone edits an + // image and it's size doesn't change. Although very unlikely. + // The slightest change to the image is more likely to either increase + // or decrease the image size by at least 1 byte. + // Also, because of this limitation, we're not confident to cache the + // image more than say 24h. But in the unlikely event that someone does + // edit an image and the size doesn't change, there's always the + // escape hatch that you can soft-purge all manual CDN surrogate keys. + if (!stats.size) return + const hash = `${stats.size}` + const split = src.split('/') + split.splice(2, 0, `cb-${hash}`) + return split.join('/') + } catch (err) { + console.warn(`Error trying to get a hash for ${src}`, err) + } +} diff --git a/lib/rewrite-local-links.js b/lib/rewrite-local-links.js index 88ed2a16d4..2deb2e3055 100644 --- a/lib/rewrite-local-links.js +++ b/lib/rewrite-local-links.js @@ -26,8 +26,8 @@ function getNewHref(link, languageCode, version) { const href = link.attr('href') // Exceptions to link rewriting - if (href.startsWith('/assets')) return - if (href.startsWith('/public')) return + if (href.startsWith('/assets/')) return + if (href.startsWith('/public/')) return if (Object.keys(externalRedirects).includes(href)) return let newHref = href diff --git a/middleware/asset-preprocessing.js b/middleware/asset-preprocessing.js new file mode 100644 index 0000000000..4d646cf571 --- /dev/null +++ b/middleware/asset-preprocessing.js @@ -0,0 +1,26 @@ +// This middleware rewrites the URL of requests that contain the +// portion of `/cb-\d+/`. +// "cb" stands for "cache bust". +// There's a Markdown plugin that rewrites all values +// from `` to +// `` for example. +// We're doing this so that we can set a much more aggressive +// Cache-Control for assets and a CDN surrogate-key that doesn't +// soft-purge on every deployment. + +const regex = /\/cb-\d+\// + +export default function assetPreprocessing(req, res, next) { + if (req.path.startsWith('/assets/')) { + // We're only confident enough to set the *manual* surrogate key if the + // asset contains the cache-busting piece. + if (regex.test(req.url)) { + // We remove it so that when `express.static()` runs, it can + // find the file on disk by its original name. + req.url = req.url.replace(regex, '/') + // The Cache-Control is managed by the configuration + // for express.static() later in the middleware. + } + } + return next() +} diff --git a/middleware/index.js b/middleware/index.js index c42d918943..8b4f73d951 100644 --- a/middleware/index.js +++ b/middleware/index.js @@ -14,7 +14,10 @@ import csrf from './csrf.js' import handleCsrfErrors from './handle-csrf-errors.js' import compression from 'compression' import disableCachingOnSafari from './disable-caching-on-safari.js' -import setDefaultFastlySurrogateKey from './set-fastly-surrogate-key.js' +import { + setDefaultFastlySurrogateKey, + setManualFastlySurrogateKey, +} from './set-fastly-surrogate-key.js' import setFastlyCacheHeaders from './set-fastly-cache-headers.js' import catchBadAcceptLanguage from './catch-bad-accept-language.js' import reqUtils from './req-utils.js' @@ -57,6 +60,7 @@ import featuredLinks from './featured-links.js' import learningTrack from './learning-track.js' import next from './next.js' import renderPage from './render-page.js' +import assetPreprocessing from './asset-preprocessing.js' const { NODE_ENV } = process.env const isDevelopment = NODE_ENV === 'development' @@ -95,17 +99,25 @@ export default function (app) { instrument(archivedEnterpriseVersionsAssets, './archived-enterprise-versions-assets') ) ) + // This must come before the express.static('assets') middleware. + app.use(assetPreprocessing) + // By specifying '/assets/cb-' and not just '/assets/' we + // avoid possibly legacy enterprise assets URLs and asset image URLs + // that don't have the cache-busting piece in it. + app.use('/assets/cb-', setManualFastlySurrogateKey) app.use( - '/assets', + '/assets/', express.static('assets', { index: false, etag: false, lastModified: false, - maxAge: '1 day', // Relatively short in case we update images + // Can be aggressive because images inside the content get unique + // URLs with a cache busting prefix. + maxAge: '7 days', }) ) app.use( - '/public', + '/public/', express.static('data/graphql', { index: false, etag: false, diff --git a/middleware/set-fastly-surrogate-key.js b/middleware/set-fastly-surrogate-key.js index b1f6ee5d11..6bfc457423 100644 --- a/middleware/set-fastly-surrogate-key.js +++ b/middleware/set-fastly-surrogate-key.js @@ -23,7 +23,12 @@ export function setFastlySurrogateKey(res, enumKey) { res.set(KEY, enumKey) } -export default function setDefaultFastlySurrogateKey(req, res, next) { +export function setManualFastlySurrogateKey(req, res, next) { + res.set(KEY, SURROGATE_ENUMS.MANUAL) + return next() +} + +export function setDefaultFastlySurrogateKey(req, res, next) { res.set(KEY, SURROGATE_ENUMS.DEFAULT) return next() } diff --git a/script/rendered-content-link-checker.mjs b/script/rendered-content-link-checker.mjs index 2e9288150e..9aa16702ea 100755 --- a/script/rendered-content-link-checker.mjs +++ b/script/rendered-content-link-checker.mjs @@ -240,7 +240,13 @@ async function processPermalink(permalink, page, pageMap, redirects, opts) { if (checkImages) { $('img[src]').each((i, img) => { - const { src } = img.attribs + let { src } = img.attribs + + // Images get a cache-busting prefix injected in the image + // E.g. + // We need to remove that otherwise we can't look up the image + // on disk. + src = src.replace(/\/cb-\d+\//, '/') if (globalImageSrcCheckCache.has(src)) { globalCacheHitCount++ diff --git a/tests/rendering/server.js b/tests/rendering/server.js index 614b6ba4b3..93f46b2a02 100644 --- a/tests/rendering/server.js +++ b/tests/rendering/server.js @@ -375,6 +375,7 @@ describe('server', () => { }) describe('image asset paths', () => { + const localImageCacheBustBasePathRegex = /^\/assets\/cb-\d+\/images\// const localImageBasePath = '/assets/images' const legacyImageBasePath = '/assets/enterprise' const latestEnterprisePath = `/en/enterprise/${enterpriseServerReleases.latest}` @@ -384,7 +385,10 @@ describe('server', () => { const $ = await getDOM( '/en/github/authenticating-to-github/configuring-two-factor-authentication' ) - expect($('img').first().attr('src').startsWith(localImageBasePath)).toBe(true) + const imageSrc = $('img').first().attr('src') + expect( + localImageCacheBustBasePathRegex.test(imageSrc) || imageSrc.startsWith(localImageBasePath) + ).toBe(true) }) test('github articles on GHE have images that point to local assets dir', async () => { @@ -393,7 +397,9 @@ describe('server', () => { ) const imageSrc = $('img').first().attr('src') expect( - imageSrc.startsWith(localImageBasePath) || imageSrc.startsWith(legacyImageBasePath) + localImageCacheBustBasePathRegex.test(imageSrc) || + imageSrc.startsWith(localImageBasePath) || + imageSrc.startsWith(legacyImageBasePath) ).toBe(true) }) @@ -403,7 +409,9 @@ describe('server', () => { ) const imageSrc = $('img').first().attr('src') expect( - imageSrc.startsWith(localImageBasePath) || imageSrc.startsWith(legacyImageBasePath) + localImageCacheBustBasePathRegex.test(imageSrc) || + imageSrc.startsWith(localImageBasePath) || + imageSrc.startsWith(legacyImageBasePath) ).toBe(true) }) @@ -413,7 +421,9 @@ describe('server', () => { ) const imageSrc = $('img').first().attr('src') expect( - imageSrc.startsWith(localImageBasePath) || imageSrc.startsWith(legacyImageBasePath) + localImageCacheBustBasePathRegex.test(imageSrc) || + imageSrc.startsWith(localImageBasePath) || + imageSrc.startsWith(legacyImageBasePath) ).toBe(true) }) @@ -428,14 +438,20 @@ describe('server', () => { const $ = await getDOM( '/en/enterprise-cloud@latest/billing/managing-billing-for-your-github-account/viewing-the-subscription-and-usage-for-your-enterprise-account' ) - expect($('img').first().attr('src').startsWith(localImageBasePath)).toBe(true) + const imageSrc = $('img').first().attr('src') + expect( + localImageCacheBustBasePathRegex.test(imageSrc) || imageSrc.startsWith(localImageBasePath) + ).toBe(true) }) test('admin articles on GHEC have images that point to local assets dir', async () => { const $ = await getDOM( '/en/enterprise-cloud@latest/admin/configuration/configuring-your-enterprise/verifying-or-approving-a-domain-for-your-enterprise' ) - expect($('img').first().attr('src').startsWith(localImageBasePath)).toBe(true) + const imageSrc = $('img').first().attr('src') + expect( + localImageCacheBustBasePathRegex.test(imageSrc) || imageSrc.startsWith(localImageBasePath) + ).toBe(true) }) test('github articles on GHAE have images that point to local assets dir', async () => { @@ -444,13 +460,18 @@ describe('server', () => { ) const imageSrc = $('img').first().attr('src') expect( - imageSrc.startsWith(localImageBasePath) || imageSrc.startsWith(legacyImageBasePath) + localImageCacheBustBasePathRegex.test(imageSrc) || + imageSrc.startsWith(localImageBasePath) || + imageSrc.startsWith(legacyImageBasePath) ).toBe(true) }) test('admin articles on GHAE have images that point to local assets dir', async () => { const $ = await getDOM('/en/github-ae@latest/admin/user-management/managing-dormant-users') - expect($('img').first().attr('src').startsWith(localImageBasePath)).toBe(true) + const imageSrc = $('img').first().attr('src') + expect( + localImageCacheBustBasePathRegex.test(imageSrc) || imageSrc.startsWith(localImageBasePath) + ).toBe(true) }) }) @@ -1003,6 +1024,16 @@ describe('static routes', () => { expect(res.headers['surrogate-key']).toBeTruthy() }) + it('rewrites /assets requests from a cache-busting prefix', async () => { + // The rewrite-asset-urls.js Markdown plugin will do this to img tags. + const res = await get('/assets/cb-123456/images/site/be-social.gif') + expect(res.statusCode).toBe(200) + expect(res.headers['set-cookie']).toBeUndefined() + expect(res.headers['cache-control']).toContain('public') + expect(res.headers['cache-control']).toMatch(/max-age=\d+/) + expect(res.headers['surrogate-key']).toBeTruthy() + }) + it('serves schema files from the /data/graphql directory at /public', async () => { const res = await get('/public/schema.docs.graphql') expect(res.statusCode).toBe(200)