From 3ce72b8d58f9e2bbe3debc92181ec5e6278143bf Mon Sep 17 00:00:00 2001 From: Peter Bengtsson Date: Mon, 8 May 2023 11:13:13 -0400 Subject: [PATCH] Cache robots.txt longer (#36907) --- middleware/robots.js | 4 ++++ tests/rendering/robots-txt.js | 30 +++++++++++++++++++----------- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/middleware/robots.js b/middleware/robots.js index 53fa6f8abc..9e58864f03 100644 --- a/middleware/robots.js +++ b/middleware/robots.js @@ -1,3 +1,5 @@ +import { defaultCacheControl } from './cache-control.js' + const defaultResponse = 'User-agent: *' const disallowAll = `User-agent: * @@ -8,6 +10,8 @@ export default function robots(req, res, next) { res.type('text/plain') + defaultCacheControl(res) + // only include robots.txt when it's our production domain and adding localhost for robots-txt.js test if (req.hostname === 'docs.github.com' || req.hostname === '127.0.0.1') { return res.send(defaultResponse) diff --git a/tests/rendering/robots-txt.js b/tests/rendering/robots-txt.js index 6c6e8cffed..1a8010c2bc 100644 --- a/tests/rendering/robots-txt.js +++ b/tests/rendering/robots-txt.js @@ -1,6 +1,11 @@ -import robotsParser from 'robots-parser' -import { get } from '../helpers/e2etest.js' import { expect, jest } from '@jest/globals' +import robotsParser from 'robots-parser' + +import { + SURROGATE_ENUMS, + makeLanguageSurrogateKey, +} from '../../middleware/set-fastly-surrogate-key.js' +import { get } from '../helpers/e2etest.js' describe('robots.txt', () => { jest.setTimeout(5 * 60 * 1000) @@ -16,7 +21,7 @@ describe('robots.txt', () => { robots = robotsParser('https://docs.github.com/robots.txt', res.text) }) - it('allows indexing of the homepage and English content', async () => { + test('allows indexing of the homepage and English content', async () => { expect(robots.isAllowed('https://docs.github.com/')).toBe(true) expect(robots.isAllowed('https://docs.github.com/en')).toBe(true) expect( @@ -24,7 +29,7 @@ describe('robots.txt', () => { ).toBe(true) }) - it('disallows indexing of azurecontainer.io domains', async () => { + test('disallows indexing of azurecontainer.io domains', async () => { const res = await get('/robots.txt', { headers: { host: 'docs-internal-preview-12345-asdfz.azurecontainer.io', @@ -33,12 +38,15 @@ describe('robots.txt', () => { expect(res.body).toEqual('User-agent: *\nDisallow: /') }) - it('does not have duplicate lines', () => { - const lines = new Set() - for (const line of res.body.split('\n')) { - if (/^\s*$/.test(line)) continue - expect(lines.has(line)).toBe(false) - lines.add(line) - } + test('does not have duplicate lines', () => { + expect(res.body.split('\n').length).toBe(new Set(res.body.split('\n')).size) + }) + + test('is cached by headers', () => { + expect(res.headers['cache-control']).toMatch(/public, max-age=/) + + const surrogateKeySplit = res.headers['surrogate-key'].split(/\s/g) + expect(surrogateKeySplit.includes(SURROGATE_ENUMS.DEFAULT)).toBeTruthy() + expect(surrogateKeySplit.includes(makeLanguageSurrogateKey('en'))).toBeTruthy() }) })