diff --git a/lib/render-content/create-processor.js b/lib/render-content/create-processor.js
index c010e0bae3..4adafd2169 100644
--- a/lib/render-content/create-processor.js
+++ b/lib/render-content/create-processor.js
@@ -18,6 +18,7 @@ import HighlightjsGraphql from 'highlightjs-graphql'
import remarkCodeExtra from 'remark-code-extra'
import codeHeader from './plugins/code-header.js'
import rewriteLocalLinks from './plugins/rewrite-local-links.js'
+import rewriteImgSources from './plugins/rewrite-asset-urls.js'
import useEnglishHeadings from './plugins/use-english-headings.js'
import rewriteLegacyAssetPaths from './plugins/rewrite-legacy-asset-paths.js'
import wrapInElement from './plugins/wrap-in-element.js'
@@ -45,6 +46,7 @@ export default function createProcessor(context) {
.use(raw)
.use(rewriteLegacyAssetPaths, context)
.use(wrapInElement, { selector: 'ol > li img', wrapper: 'span.procedural-image-wrapper' })
+ .use(rewriteImgSources)
.use(rewriteLocalLinks, {
languageCode: context.currentLanguage,
version: context.currentVersion,
diff --git a/lib/render-content/plugins/rewrite-asset-urls.js b/lib/render-content/plugins/rewrite-asset-urls.js
new file mode 100644
index 0000000000..4808777a2d
--- /dev/null
+++ b/lib/render-content/plugins/rewrite-asset-urls.js
@@ -0,0 +1,52 @@
+import fs from 'fs'
+
+import { visit } from 'unist-util-visit'
+
+// Matches any
tags with an href that starts with `/assets/` or '/public/'
+const matcher = (node) =>
+ node.type === 'element' &&
+ node.tagName === 'img' &&
+ node.properties &&
+ node.properties.src &&
+ (node.properties.src.startsWith('/assets/') || node.properties.src.startsWith('/public/'))
+
+// Content authors write images like ` {
+ return (tree) => {
+ visit(tree, matcher, (node) => {
+ const newSrc = getNewSrc(node)
+ if (newSrc) {
+ node.properties.src = newSrc
+ }
+ })
+ }
+}
+
+function getNewSrc(node) {
+ const { src } = node.properties
+ if (!src.startsWith('/')) return
+
+ try {
+ const filePath = src.slice(1)
+ const stats = fs.statSync(filePath)
+ // The size is not perfect but it's good enough. The size is
+ // very fast to pick up without needing to do a deep hashing of the
+ // image's content. It's perfectly possible that someone edits an
+ // image and it's size doesn't change. Although very unlikely.
+ // The slightest change to the image is more likely to either increase
+ // or decrease the image size by at least 1 byte.
+ // Also, because of this limitation, we're not confident to cache the
+ // image more than say 24h. But in the unlikely event that someone does
+ // edit an image and the size doesn't change, there's always the
+ // escape hatch that you can soft-purge all manual CDN surrogate keys.
+ if (!stats.size) return
+ const hash = `${stats.size}`
+ const split = src.split('/')
+ split.splice(2, 0, `cb-${hash}`)
+ return split.join('/')
+ } catch (err) {
+ console.warn(`Error trying to get a hash for ${src}`, err)
+ }
+}
diff --git a/lib/rewrite-local-links.js b/lib/rewrite-local-links.js
index 88ed2a16d4..2deb2e3055 100644
--- a/lib/rewrite-local-links.js
+++ b/lib/rewrite-local-links.js
@@ -26,8 +26,8 @@ function getNewHref(link, languageCode, version) {
const href = link.attr('href')
// Exceptions to link rewriting
- if (href.startsWith('/assets')) return
- if (href.startsWith('/public')) return
+ if (href.startsWith('/assets/')) return
+ if (href.startsWith('/public/')) return
if (Object.keys(externalRedirects).includes(href)) return
let newHref = href
diff --git a/middleware/asset-preprocessing.js b/middleware/asset-preprocessing.js
new file mode 100644
index 0000000000..4d646cf571
--- /dev/null
+++ b/middleware/asset-preprocessing.js
@@ -0,0 +1,26 @@
+// This middleware rewrites the URL of requests that contain the
+// portion of `/cb-\d+/`.
+// "cb" stands for "cache bust".
+// There's a Markdown plugin that rewrites all
values
+// from `
` to
+// `
` for example.
+// We're doing this so that we can set a much more aggressive
+// Cache-Control for assets and a CDN surrogate-key that doesn't
+// soft-purge on every deployment.
+
+const regex = /\/cb-\d+\//
+
+export default function assetPreprocessing(req, res, next) {
+ if (req.path.startsWith('/assets/')) {
+ // We're only confident enough to set the *manual* surrogate key if the
+ // asset contains the cache-busting piece.
+ if (regex.test(req.url)) {
+ // We remove it so that when `express.static()` runs, it can
+ // find the file on disk by its original name.
+ req.url = req.url.replace(regex, '/')
+ // The Cache-Control is managed by the configuration
+ // for express.static() later in the middleware.
+ }
+ }
+ return next()
+}
diff --git a/middleware/index.js b/middleware/index.js
index c42d918943..8b4f73d951 100644
--- a/middleware/index.js
+++ b/middleware/index.js
@@ -14,7 +14,10 @@ import csrf from './csrf.js'
import handleCsrfErrors from './handle-csrf-errors.js'
import compression from 'compression'
import disableCachingOnSafari from './disable-caching-on-safari.js'
-import setDefaultFastlySurrogateKey from './set-fastly-surrogate-key.js'
+import {
+ setDefaultFastlySurrogateKey,
+ setManualFastlySurrogateKey,
+} from './set-fastly-surrogate-key.js'
import setFastlyCacheHeaders from './set-fastly-cache-headers.js'
import catchBadAcceptLanguage from './catch-bad-accept-language.js'
import reqUtils from './req-utils.js'
@@ -57,6 +60,7 @@ import featuredLinks from './featured-links.js'
import learningTrack from './learning-track.js'
import next from './next.js'
import renderPage from './render-page.js'
+import assetPreprocessing from './asset-preprocessing.js'
const { NODE_ENV } = process.env
const isDevelopment = NODE_ENV === 'development'
@@ -95,17 +99,25 @@ export default function (app) {
instrument(archivedEnterpriseVersionsAssets, './archived-enterprise-versions-assets')
)
)
+ // This must come before the express.static('assets') middleware.
+ app.use(assetPreprocessing)
+ // By specifying '/assets/cb-' and not just '/assets/' we
+ // avoid possibly legacy enterprise assets URLs and asset image URLs
+ // that don't have the cache-busting piece in it.
+ app.use('/assets/cb-', setManualFastlySurrogateKey)
app.use(
- '/assets',
+ '/assets/',
express.static('assets', {
index: false,
etag: false,
lastModified: false,
- maxAge: '1 day', // Relatively short in case we update images
+ // Can be aggressive because images inside the content get unique
+ // URLs with a cache busting prefix.
+ maxAge: '7 days',
})
)
app.use(
- '/public',
+ '/public/',
express.static('data/graphql', {
index: false,
etag: false,
diff --git a/middleware/set-fastly-surrogate-key.js b/middleware/set-fastly-surrogate-key.js
index b1f6ee5d11..6bfc457423 100644
--- a/middleware/set-fastly-surrogate-key.js
+++ b/middleware/set-fastly-surrogate-key.js
@@ -23,7 +23,12 @@ export function setFastlySurrogateKey(res, enumKey) {
res.set(KEY, enumKey)
}
-export default function setDefaultFastlySurrogateKey(req, res, next) {
+export function setManualFastlySurrogateKey(req, res, next) {
+ res.set(KEY, SURROGATE_ENUMS.MANUAL)
+ return next()
+}
+
+export function setDefaultFastlySurrogateKey(req, res, next) {
res.set(KEY, SURROGATE_ENUMS.DEFAULT)
return next()
}
diff --git a/script/rendered-content-link-checker.mjs b/script/rendered-content-link-checker.mjs
index 2e9288150e..9aa16702ea 100755
--- a/script/rendered-content-link-checker.mjs
+++ b/script/rendered-content-link-checker.mjs
@@ -240,7 +240,13 @@ async function processPermalink(permalink, page, pageMap, redirects, opts) {
if (checkImages) {
$('img[src]').each((i, img) => {
- const { src } = img.attribs
+ let { src } = img.attribs
+
+ // Images get a cache-busting prefix injected in the image
+ // E.g.
+ // We need to remove that otherwise we can't look up the image
+ // on disk.
+ src = src.replace(/\/cb-\d+\//, '/')
if (globalImageSrcCheckCache.has(src)) {
globalCacheHitCount++
diff --git a/tests/rendering/server.js b/tests/rendering/server.js
index 614b6ba4b3..93f46b2a02 100644
--- a/tests/rendering/server.js
+++ b/tests/rendering/server.js
@@ -375,6 +375,7 @@ describe('server', () => {
})
describe('image asset paths', () => {
+ const localImageCacheBustBasePathRegex = /^\/assets\/cb-\d+\/images\//
const localImageBasePath = '/assets/images'
const legacyImageBasePath = '/assets/enterprise'
const latestEnterprisePath = `/en/enterprise/${enterpriseServerReleases.latest}`
@@ -384,7 +385,10 @@ describe('server', () => {
const $ = await getDOM(
'/en/github/authenticating-to-github/configuring-two-factor-authentication'
)
- expect($('img').first().attr('src').startsWith(localImageBasePath)).toBe(true)
+ const imageSrc = $('img').first().attr('src')
+ expect(
+ localImageCacheBustBasePathRegex.test(imageSrc) || imageSrc.startsWith(localImageBasePath)
+ ).toBe(true)
})
test('github articles on GHE have images that point to local assets dir', async () => {
@@ -393,7 +397,9 @@ describe('server', () => {
)
const imageSrc = $('img').first().attr('src')
expect(
- imageSrc.startsWith(localImageBasePath) || imageSrc.startsWith(legacyImageBasePath)
+ localImageCacheBustBasePathRegex.test(imageSrc) ||
+ imageSrc.startsWith(localImageBasePath) ||
+ imageSrc.startsWith(legacyImageBasePath)
).toBe(true)
})
@@ -403,7 +409,9 @@ describe('server', () => {
)
const imageSrc = $('img').first().attr('src')
expect(
- imageSrc.startsWith(localImageBasePath) || imageSrc.startsWith(legacyImageBasePath)
+ localImageCacheBustBasePathRegex.test(imageSrc) ||
+ imageSrc.startsWith(localImageBasePath) ||
+ imageSrc.startsWith(legacyImageBasePath)
).toBe(true)
})
@@ -413,7 +421,9 @@ describe('server', () => {
)
const imageSrc = $('img').first().attr('src')
expect(
- imageSrc.startsWith(localImageBasePath) || imageSrc.startsWith(legacyImageBasePath)
+ localImageCacheBustBasePathRegex.test(imageSrc) ||
+ imageSrc.startsWith(localImageBasePath) ||
+ imageSrc.startsWith(legacyImageBasePath)
).toBe(true)
})
@@ -428,14 +438,20 @@ describe('server', () => {
const $ = await getDOM(
'/en/enterprise-cloud@latest/billing/managing-billing-for-your-github-account/viewing-the-subscription-and-usage-for-your-enterprise-account'
)
- expect($('img').first().attr('src').startsWith(localImageBasePath)).toBe(true)
+ const imageSrc = $('img').first().attr('src')
+ expect(
+ localImageCacheBustBasePathRegex.test(imageSrc) || imageSrc.startsWith(localImageBasePath)
+ ).toBe(true)
})
test('admin articles on GHEC have images that point to local assets dir', async () => {
const $ = await getDOM(
'/en/enterprise-cloud@latest/admin/configuration/configuring-your-enterprise/verifying-or-approving-a-domain-for-your-enterprise'
)
- expect($('img').first().attr('src').startsWith(localImageBasePath)).toBe(true)
+ const imageSrc = $('img').first().attr('src')
+ expect(
+ localImageCacheBustBasePathRegex.test(imageSrc) || imageSrc.startsWith(localImageBasePath)
+ ).toBe(true)
})
test('github articles on GHAE have images that point to local assets dir', async () => {
@@ -444,13 +460,18 @@ describe('server', () => {
)
const imageSrc = $('img').first().attr('src')
expect(
- imageSrc.startsWith(localImageBasePath) || imageSrc.startsWith(legacyImageBasePath)
+ localImageCacheBustBasePathRegex.test(imageSrc) ||
+ imageSrc.startsWith(localImageBasePath) ||
+ imageSrc.startsWith(legacyImageBasePath)
).toBe(true)
})
test('admin articles on GHAE have images that point to local assets dir', async () => {
const $ = await getDOM('/en/github-ae@latest/admin/user-management/managing-dormant-users')
- expect($('img').first().attr('src').startsWith(localImageBasePath)).toBe(true)
+ const imageSrc = $('img').first().attr('src')
+ expect(
+ localImageCacheBustBasePathRegex.test(imageSrc) || imageSrc.startsWith(localImageBasePath)
+ ).toBe(true)
})
})
@@ -1003,6 +1024,16 @@ describe('static routes', () => {
expect(res.headers['surrogate-key']).toBeTruthy()
})
+ it('rewrites /assets requests from a cache-busting prefix', async () => {
+ // The rewrite-asset-urls.js Markdown plugin will do this to img tags.
+ const res = await get('/assets/cb-123456/images/site/be-social.gif')
+ expect(res.statusCode).toBe(200)
+ expect(res.headers['set-cookie']).toBeUndefined()
+ expect(res.headers['cache-control']).toContain('public')
+ expect(res.headers['cache-control']).toMatch(/max-age=\d+/)
+ expect(res.headers['surrogate-key']).toBeTruthy()
+ })
+
it('serves schema files from the /data/graphql directory at /public', async () => {
const res = await get('/public/schema.docs.graphql')
expect(res.statusCode).toBe(200)