1
0
mirror of synced 2025-12-20 10:28:40 -05:00

cache asset images more aggressively (#23553)

* cache asset images more aggressively

* more careful about which gets the manual surrogate key

* fix rendered-content-link-checker script too

* feedbacked
This commit is contained in:
Peter Bengtsson
2021-12-10 08:01:48 -05:00
committed by GitHub
parent dc1a510110
commit ae3dadfc66
8 changed files with 150 additions and 16 deletions

View File

@@ -18,6 +18,7 @@ import HighlightjsGraphql from 'highlightjs-graphql'
import remarkCodeExtra from 'remark-code-extra'
import codeHeader from './plugins/code-header.js'
import rewriteLocalLinks from './plugins/rewrite-local-links.js'
import rewriteImgSources from './plugins/rewrite-asset-urls.js'
import useEnglishHeadings from './plugins/use-english-headings.js'
import rewriteLegacyAssetPaths from './plugins/rewrite-legacy-asset-paths.js'
import wrapInElement from './plugins/wrap-in-element.js'
@@ -45,6 +46,7 @@ export default function createProcessor(context) {
.use(raw)
.use(rewriteLegacyAssetPaths, context)
.use(wrapInElement, { selector: 'ol > li img', wrapper: 'span.procedural-image-wrapper' })
.use(rewriteImgSources)
.use(rewriteLocalLinks, {
languageCode: context.currentLanguage,
version: context.currentVersion,

View File

@@ -0,0 +1,52 @@
import fs from 'fs'
import { visit } from 'unist-util-visit'
// Matches any <img> tags with an href that starts with `/assets/` or '/public/'
const matcher = (node) =>
node.type === 'element' &&
node.tagName === 'img' &&
node.properties &&
node.properties.src &&
(node.properties.src.startsWith('/assets/') || node.properties.src.startsWith('/public/'))
// Content authors write images like `![Alt](/assets/images/foo.png`, but
// for caching purposes we want to rewrite those so they can be cached
// indefinitely.
export default function rewriteImgSources() {
return (tree) => {
visit(tree, matcher, (node) => {
const newSrc = getNewSrc(node)
if (newSrc) {
node.properties.src = newSrc
}
})
}
}
function getNewSrc(node) {
const { src } = node.properties
if (!src.startsWith('/')) return
try {
const filePath = src.slice(1)
const stats = fs.statSync(filePath)
// The size is not perfect but it's good enough. The size is
// very fast to pick up without needing to do a deep hashing of the
// image's content. It's perfectly possible that someone edits an
// image and it's size doesn't change. Although very unlikely.
// The slightest change to the image is more likely to either increase
// or decrease the image size by at least 1 byte.
// Also, because of this limitation, we're not confident to cache the
// image more than say 24h. But in the unlikely event that someone does
// edit an image and the size doesn't change, there's always the
// escape hatch that you can soft-purge all manual CDN surrogate keys.
if (!stats.size) return
const hash = `${stats.size}`
const split = src.split('/')
split.splice(2, 0, `cb-${hash}`)
return split.join('/')
} catch (err) {
console.warn(`Error trying to get a hash for ${src}`, err)
}
}

View File

@@ -26,8 +26,8 @@ function getNewHref(link, languageCode, version) {
const href = link.attr('href')
// Exceptions to link rewriting
if (href.startsWith('/assets')) return
if (href.startsWith('/public')) return
if (href.startsWith('/assets/')) return
if (href.startsWith('/public/')) return
if (Object.keys(externalRedirects).includes(href)) return
let newHref = href

View File

@@ -0,0 +1,26 @@
// This middleware rewrites the URL of requests that contain the
// portion of `/cb-\d+/`.
// "cb" stands for "cache bust".
// There's a Markdown plugin that rewrites all <img src> values
// from `<img src="/assets/foo/bar.png">` to
// `<img src="/assets/cb-123467/foo/bar.png">` for example.
// We're doing this so that we can set a much more aggressive
// Cache-Control for assets and a CDN surrogate-key that doesn't
// soft-purge on every deployment.
const regex = /\/cb-\d+\//
export default function assetPreprocessing(req, res, next) {
if (req.path.startsWith('/assets/')) {
// We're only confident enough to set the *manual* surrogate key if the
// asset contains the cache-busting piece.
if (regex.test(req.url)) {
// We remove it so that when `express.static()` runs, it can
// find the file on disk by its original name.
req.url = req.url.replace(regex, '/')
// The Cache-Control is managed by the configuration
// for express.static() later in the middleware.
}
}
return next()
}

View File

@@ -14,7 +14,10 @@ import csrf from './csrf.js'
import handleCsrfErrors from './handle-csrf-errors.js'
import compression from 'compression'
import disableCachingOnSafari from './disable-caching-on-safari.js'
import setDefaultFastlySurrogateKey from './set-fastly-surrogate-key.js'
import {
setDefaultFastlySurrogateKey,
setManualFastlySurrogateKey,
} from './set-fastly-surrogate-key.js'
import setFastlyCacheHeaders from './set-fastly-cache-headers.js'
import catchBadAcceptLanguage from './catch-bad-accept-language.js'
import reqUtils from './req-utils.js'
@@ -57,6 +60,7 @@ import featuredLinks from './featured-links.js'
import learningTrack from './learning-track.js'
import next from './next.js'
import renderPage from './render-page.js'
import assetPreprocessing from './asset-preprocessing.js'
const { NODE_ENV } = process.env
const isDevelopment = NODE_ENV === 'development'
@@ -95,17 +99,25 @@ export default function (app) {
instrument(archivedEnterpriseVersionsAssets, './archived-enterprise-versions-assets')
)
)
// This must come before the express.static('assets') middleware.
app.use(assetPreprocessing)
// By specifying '/assets/cb-' and not just '/assets/' we
// avoid possibly legacy enterprise assets URLs and asset image URLs
// that don't have the cache-busting piece in it.
app.use('/assets/cb-', setManualFastlySurrogateKey)
app.use(
'/assets',
'/assets/',
express.static('assets', {
index: false,
etag: false,
lastModified: false,
maxAge: '1 day', // Relatively short in case we update images
// Can be aggressive because images inside the content get unique
// URLs with a cache busting prefix.
maxAge: '7 days',
})
)
app.use(
'/public',
'/public/',
express.static('data/graphql', {
index: false,
etag: false,

View File

@@ -23,7 +23,12 @@ export function setFastlySurrogateKey(res, enumKey) {
res.set(KEY, enumKey)
}
export default function setDefaultFastlySurrogateKey(req, res, next) {
export function setManualFastlySurrogateKey(req, res, next) {
res.set(KEY, SURROGATE_ENUMS.MANUAL)
return next()
}
export function setDefaultFastlySurrogateKey(req, res, next) {
res.set(KEY, SURROGATE_ENUMS.DEFAULT)
return next()
}

View File

@@ -240,7 +240,13 @@ async function processPermalink(permalink, page, pageMap, redirects, opts) {
if (checkImages) {
$('img[src]').each((i, img) => {
const { src } = img.attribs
let { src } = img.attribs
// Images get a cache-busting prefix injected in the image
// E.g. <img src="/assets/cb-123456/foo/bar.png">
// We need to remove that otherwise we can't look up the image
// on disk.
src = src.replace(/\/cb-\d+\//, '/')
if (globalImageSrcCheckCache.has(src)) {
globalCacheHitCount++

View File

@@ -375,6 +375,7 @@ describe('server', () => {
})
describe('image asset paths', () => {
const localImageCacheBustBasePathRegex = /^\/assets\/cb-\d+\/images\//
const localImageBasePath = '/assets/images'
const legacyImageBasePath = '/assets/enterprise'
const latestEnterprisePath = `/en/enterprise/${enterpriseServerReleases.latest}`
@@ -384,7 +385,10 @@ describe('server', () => {
const $ = await getDOM(
'/en/github/authenticating-to-github/configuring-two-factor-authentication'
)
expect($('img').first().attr('src').startsWith(localImageBasePath)).toBe(true)
const imageSrc = $('img').first().attr('src')
expect(
localImageCacheBustBasePathRegex.test(imageSrc) || imageSrc.startsWith(localImageBasePath)
).toBe(true)
})
test('github articles on GHE have images that point to local assets dir', async () => {
@@ -393,7 +397,9 @@ describe('server', () => {
)
const imageSrc = $('img').first().attr('src')
expect(
imageSrc.startsWith(localImageBasePath) || imageSrc.startsWith(legacyImageBasePath)
localImageCacheBustBasePathRegex.test(imageSrc) ||
imageSrc.startsWith(localImageBasePath) ||
imageSrc.startsWith(legacyImageBasePath)
).toBe(true)
})
@@ -403,7 +409,9 @@ describe('server', () => {
)
const imageSrc = $('img').first().attr('src')
expect(
imageSrc.startsWith(localImageBasePath) || imageSrc.startsWith(legacyImageBasePath)
localImageCacheBustBasePathRegex.test(imageSrc) ||
imageSrc.startsWith(localImageBasePath) ||
imageSrc.startsWith(legacyImageBasePath)
).toBe(true)
})
@@ -413,7 +421,9 @@ describe('server', () => {
)
const imageSrc = $('img').first().attr('src')
expect(
imageSrc.startsWith(localImageBasePath) || imageSrc.startsWith(legacyImageBasePath)
localImageCacheBustBasePathRegex.test(imageSrc) ||
imageSrc.startsWith(localImageBasePath) ||
imageSrc.startsWith(legacyImageBasePath)
).toBe(true)
})
@@ -428,14 +438,20 @@ describe('server', () => {
const $ = await getDOM(
'/en/enterprise-cloud@latest/billing/managing-billing-for-your-github-account/viewing-the-subscription-and-usage-for-your-enterprise-account'
)
expect($('img').first().attr('src').startsWith(localImageBasePath)).toBe(true)
const imageSrc = $('img').first().attr('src')
expect(
localImageCacheBustBasePathRegex.test(imageSrc) || imageSrc.startsWith(localImageBasePath)
).toBe(true)
})
test('admin articles on GHEC have images that point to local assets dir', async () => {
const $ = await getDOM(
'/en/enterprise-cloud@latest/admin/configuration/configuring-your-enterprise/verifying-or-approving-a-domain-for-your-enterprise'
)
expect($('img').first().attr('src').startsWith(localImageBasePath)).toBe(true)
const imageSrc = $('img').first().attr('src')
expect(
localImageCacheBustBasePathRegex.test(imageSrc) || imageSrc.startsWith(localImageBasePath)
).toBe(true)
})
test('github articles on GHAE have images that point to local assets dir', async () => {
@@ -444,13 +460,18 @@ describe('server', () => {
)
const imageSrc = $('img').first().attr('src')
expect(
imageSrc.startsWith(localImageBasePath) || imageSrc.startsWith(legacyImageBasePath)
localImageCacheBustBasePathRegex.test(imageSrc) ||
imageSrc.startsWith(localImageBasePath) ||
imageSrc.startsWith(legacyImageBasePath)
).toBe(true)
})
test('admin articles on GHAE have images that point to local assets dir', async () => {
const $ = await getDOM('/en/github-ae@latest/admin/user-management/managing-dormant-users')
expect($('img').first().attr('src').startsWith(localImageBasePath)).toBe(true)
const imageSrc = $('img').first().attr('src')
expect(
localImageCacheBustBasePathRegex.test(imageSrc) || imageSrc.startsWith(localImageBasePath)
).toBe(true)
})
})
@@ -1003,6 +1024,16 @@ describe('static routes', () => {
expect(res.headers['surrogate-key']).toBeTruthy()
})
it('rewrites /assets requests from a cache-busting prefix', async () => {
// The rewrite-asset-urls.js Markdown plugin will do this to img tags.
const res = await get('/assets/cb-123456/images/site/be-social.gif')
expect(res.statusCode).toBe(200)
expect(res.headers['set-cookie']).toBeUndefined()
expect(res.headers['cache-control']).toContain('public')
expect(res.headers['cache-control']).toMatch(/max-age=\d+/)
expect(res.headers['surrogate-key']).toBeTruthy()
})
it('serves schema files from the /data/graphql directory at /public', async () => {
const res = await get('/public/schema.docs.graphql')
expect(res.statusCode).toBe(200)