diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f344bf82c7..add116b530 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -72,6 +72,7 @@ jobs: - rest - search - shielding + - tracking # - tests # - tools - versions diff --git a/src/frame/middleware/cache-control.js b/src/frame/middleware/cache-control.js index 537206041b..ad255d8b16 100644 --- a/src/frame/middleware/cache-control.js +++ b/src/frame/middleware/cache-control.js @@ -22,12 +22,12 @@ function cacheControlFactory( !maxAge && 'no-store', maxAge >= 60 * 60 && `stale-while-revalidate=${60 * 60}`, maxAge >= 60 * 60 && `stale-if-error=${24 * 60 * 60}`, - maxAgeZero && 'max-age=0', + (maxAgeZero || maxAge === 0) && 'max-age=0', ] .filter(Boolean) .join(', ') return (res) => { - if (process.env.NODE_ENV !== 'production' && res.hasHeader('set-cookie')) { + if (process.env.NODE_ENV !== 'production' && res.hasHeader('set-cookie') && maxAge) { console.warn( "You can't set a >0 cache-control header AND set-cookie or else the CDN will never respect the cache-control.", ) diff --git a/src/frame/middleware/index.js b/src/frame/middleware/index.js index c8b7179408..bc43f15438 100644 --- a/src/frame/middleware/index.js +++ b/src/frame/middleware/index.js @@ -64,6 +64,7 @@ import mockVaPortal from './mock-va-portal.js' import dynamicAssets from '#src/assets/middleware/dynamic-assets.js' import contextualizeSearch from '#src/search/middleware/contextualize.js' import shielding from '#src/shielding/middleware/index.js' +import tracking from '#src/tracking/middleware/index.js' const { DEPLOYMENT_ENV, NODE_ENV } = process.env const isTest = NODE_ENV === 'test' || process.env.GITHUB_ACTIONS === 'true' @@ -209,6 +210,9 @@ export default function (app) { app.use(mockVaPortal) // FOR TESTING. } + // ** Possible early exits after cookies ** + app.use(tracking) + // *** Headers *** app.set('etag', false) // We will manage our own ETags if desired diff --git a/src/shielding/middleware/handle-invalid-query-strings.js b/src/shielding/middleware/handle-invalid-query-strings.js index 24be542219..4ae1730720 100644 --- a/src/shielding/middleware/handle-invalid-query-strings.js +++ b/src/shielding/middleware/handle-invalid-query-strings.js @@ -29,6 +29,8 @@ const RECOGNIZED_KEYS_BY_ANY = new Set([ 'query', // The drop-downs on "Webhook events and payloads" 'actionType', + // Used by the tracking middleware + 'ghdomain', ]) export default function handleInvalidQuerystrings(req, res, next) { diff --git a/src/tracking/README.md b/src/tracking/README.md new file mode 100644 index 0000000000..31f5b718de --- /dev/null +++ b/src/tracking/README.md @@ -0,0 +1,20 @@ +# Tracking + +## Overview + +This is about recording inbound links that helps with "tracking". + +For example, if you arrive on Docs with `?ghdomain=example.ghe.com` we +can pick that up and put it in a cookie so that the user's content, when +they view it, can say `curl https://example.ghe.com/api/v1` instead +of the stock `curl https://HOSTNAME/api/v1`. + +## How it works + +For a certain number of query strings, we "snatch them up" and redirect +to the same URL as you were on but with the query string key removed. +And in the 302 Found response, we might include a `set-cookie`. + +## Notes + +none diff --git a/src/tracking/middleware/handle-query-strings.js b/src/tracking/middleware/handle-query-strings.js new file mode 100644 index 0000000000..cd4aae36d2 --- /dev/null +++ b/src/tracking/middleware/handle-query-strings.js @@ -0,0 +1,68 @@ +import statsd from '#src/observability/lib/statsd.js' +import { noCacheControl } from '#src/frame/middleware/cache-control.js' + +const STATSD_KEY = 'middleware.handle_tracking_querystrings' + +// Exported for the sake of end-to-end tests +export const DOMAIN_QUERY_PARAM = 'ghdomain' +export const MAX_DOMAINS_SAVED = 3 + +const DOMAIN_COOKIE_AGE_MS = 365 * 24 * 3600 * 1000 +export const DOMAIN_COOKIE_NAME = 'github_domains' + +export default function handleTrackingQueryStrings(req, res, next) { + if (req.path.startsWith('/_next/')) { + return next() + } + + if (req.query[DOMAIN_QUERY_PARAM] || req.query[DOMAIN_QUERY_PARAM] === '') { + if (Array.isArray(req.query[DOMAIN_QUERY_PARAM])) { + res.status(400).send('can only be one') + + const tags = [`key:${DOMAIN_QUERY_PARAM}`, 'domain:_multiple_'] + statsd.increment(STATSD_KEY, 1, tags) + + return + } + + const searchParams = new URLSearchParams(req.query) + + const oldCookieValue = req.cookies[DOMAIN_COOKIE_NAME] || '' + const oldCookieValueParsed = oldCookieValue + .split(',') + .map((x) => x.trim().toLowerCase()) + .filter(Boolean) + + const domain = (searchParams.get(DOMAIN_QUERY_PARAM) || '').toLowerCase().trim() + if (!domain && !oldCookieValueParsed.length) return next() + + if (domain) { + const newCookieValue = [domain, ...oldCookieValueParsed.filter((x) => x !== domain)] + .slice(0, MAX_DOMAINS_SAVED) + .join(',') + res.cookie(DOMAIN_COOKIE_NAME, newCookieValue, { + maxAge: DOMAIN_COOKIE_AGE_MS, + httpOnly: false, + }) + } else { + res.clearCookie(DOMAIN_COOKIE_NAME) + } + + searchParams.delete(DOMAIN_QUERY_PARAM) + + noCacheControl(res) + + let newURL = req.path + if (searchParams.toString()) { + newURL += `?${searchParams.toString()}` + } + res.redirect(302, newURL) + + const tags = [`key:${DOMAIN_QUERY_PARAM}`, `domain:${domain || '_empty_'}`] + statsd.increment(STATSD_KEY, 1, tags) + + return + } + + return next() +} diff --git a/src/tracking/middleware/index.js b/src/tracking/middleware/index.js new file mode 100644 index 0000000000..60ac9cd993 --- /dev/null +++ b/src/tracking/middleware/index.js @@ -0,0 +1,9 @@ +import express from 'express' + +import handleTrackingQueryStrings from './handle-query-strings.js' + +const router = express.Router() + +router.use(handleTrackingQueryStrings) + +export default router diff --git a/src/tracking/tests/handle-query-string.js b/src/tracking/tests/handle-query-string.js new file mode 100644 index 0000000000..515162b539 --- /dev/null +++ b/src/tracking/tests/handle-query-string.js @@ -0,0 +1,116 @@ +import { get } from '#src/tests/helpers/e2etest.js' +import { expect } from '@jest/globals' +import { + DOMAIN_QUERY_PARAM, + DOMAIN_COOKIE_NAME, + MAX_DOMAINS_SAVED, +} from '../middleware/handle-query-strings.js' + +describe('setting a cookie', () => { + test('on home page', async () => { + const res = await get(`/en?${DOMAIN_QUERY_PARAM}=acme.example.com`) + expect(res.statusCode).toBe(302) + const setCookie = res.headers['set-cookie'][0] + expect(setCookie).toMatch(/github_domains=acme.example.com/) + expect(res.headers.location).toBe('/en') + expect(res.headers['cache-control']).toMatch(/private/) + expect(res.headers['cache-control']).toMatch(/max-age=0/) + }) + + test('with other query string things', async () => { + const res = await get(`/en?${DOMAIN_QUERY_PARAM}=acme.example.com&foo=bar`) + expect(res.statusCode).toBe(302) + const setCookie = res.headers['set-cookie'][0] + expect(setCookie).toMatch(/github_domains=acme.example.com/) + expect(res.headers.location).toBe('/en?foo=bar') + }) + + test('always lowercase', async () => { + const res = await get(`/en?${DOMAIN_QUERY_PARAM}=Acme.example.COM`) + expect(res.statusCode).toBe(302) + const setCookie = res.headers['set-cookie'][0] + expect(setCookie).toMatch(/github_domains=acme.example.com/) + }) + + test('on root page', async () => { + const res = await get(`/?${DOMAIN_QUERY_PARAM}=acme.example.com`) + expect(res.statusCode).toBe(302) + const setCookie = res.headers['set-cookie'][0] + expect(setCookie).toMatch(/github_domains=acme.example.com/) + expect(res.headers.location).toBe('/') + }) + + test('empty value does nothing if nothing previous', async () => { + const res = await get(`/?${DOMAIN_QUERY_PARAM}=`) + expect(res.statusCode).toBe(302) + expect(res.headers['set-cookie']).toBeUndefined() + }) + + test('empty value, when trimmed, does nothing if nothing previous', async () => { + const res = await get(`/?${DOMAIN_QUERY_PARAM}=%20`) + expect(res.statusCode).toBe(302) + expect(res.headers['set-cookie']).toBeUndefined() + }) + + test('empty value resets previous cookie', async () => { + const res = await get(`/?${DOMAIN_QUERY_PARAM}=`, { + headers: { + cookie: `${DOMAIN_COOKIE_NAME}=acme.example.com`, + }, + }) + expect(res.statusCode).toBe(302) + const setCookie = res.headers['set-cookie'][0] + expect(setCookie).toMatch(/github_domains=;/) + }) + + test('append with previous', async () => { + const res = await get(`/?${DOMAIN_QUERY_PARAM}=next.example.com`, { + headers: { + cookie: `${DOMAIN_COOKIE_NAME}=previous.example.com`, + }, + }) + expect(res.statusCode).toBe(302) + const setCookie = res.headers['set-cookie'][0] + // %2C is a comma + expect(setCookie).toMatch(/github_domains=next.example.com%2Cprevious.example.com;/) + }) + + test('append with too many', async () => { + let cookie = '' + for (const letter of Array.from('abcdef')) { + const next = `${letter}.example.com` + const res = await get(`/?${DOMAIN_QUERY_PARAM}=${next}`, { + headers: { cookie }, + }) + const setCookie = res.headers['set-cookie'][0] + cookie = setCookie.split(';').filter((x) => x.startsWith(DOMAIN_COOKIE_NAME))[0] + if (letter === 'a') { + // first + expect(cookie).toBe(`${DOMAIN_COOKIE_NAME}=a.example.com`) + } else if (letter === 'f') { + // last + expect(cookie.split('%2C').length).toBe(MAX_DOMAINS_SAVED) + expect(cookie.startsWith(`${DOMAIN_COOKIE_NAME}=f.example.com`)).toBe(true) + } + } + }) + + test('append with same as before', async () => { + const res = await get(`/?${DOMAIN_QUERY_PARAM}=Acme.example.com`, { + headers: { + cookie: `${DOMAIN_COOKIE_NAME}=acme.example.com`, + }, + }) + expect(res.statusCode).toBe(302) + const setCookie = res.headers['set-cookie'][0] + expect(setCookie).toMatch(/github_domains=acme.example.com;/) + }) + + test('trying to set multiple', async () => { + const res = await get( + `/?${DOMAIN_QUERY_PARAM}=a.example.com&${DOMAIN_QUERY_PARAM}=b.example.com`, + ) + expect(res.statusCode).toBe(400) + expect(res.body).toMatch(/can only be one/) + }) +})