diff --git a/lib/constants.js b/lib/constants.js index 8a98f93edf..3bd3326198 100644 --- a/lib/constants.js +++ b/lib/constants.js @@ -1,3 +1,4 @@ export const ROOT = process.env.ROOT || '.' export const USER_LANGUAGE_COOKIE_NAME = 'user_language' export const TRANSLATIONS_ROOT = process.env.TRANSLATIONS_ROOT || 'translations' +export const MAX_REQUEST_TIMEOUT = parseInt(process.env.REQUEST_TIMEOUT || 10000, 10) diff --git a/middleware/timeout.js b/middleware/timeout.js index 3c1ecf4dda..522a8c39af 100644 --- a/middleware/timeout.js +++ b/middleware/timeout.js @@ -1,14 +1,12 @@ import timeout from 'express-timeout-handler' import statsd from '../lib/statsd.js' - -// Heroku router requests timeout after 30 seconds. We should stop them earlier! -const maxRequestTimeout = parseInt(process.env.REQUEST_TIMEOUT || 10000, 10) +import { MAX_REQUEST_TIMEOUT } from '../lib/constants.js' export default timeout.handler({ // Default timeout for all endpoints // To override for a given router/endpoint, use `xExpressTimeoutHandler.set(...)` - timeout: maxRequestTimeout, + timeout: MAX_REQUEST_TIMEOUT, // IMPORTANT: // We cannot allow the middleware to disable the `res` object's methods like diff --git a/package-lock.json b/package-lock.json index 2510c6d3b9..8f2e24a89c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -166,7 +166,7 @@ "sass": "^1.52.3", "start-server-and-test": "^2.0.0", "strip-ansi": "^7.0.1", - "typescript": "^4.7.3", + "typescript": "^5.0.2", "unist-util-remove": "^3.1.1", "unist-util-visit-parents": "^5.1.3" }, @@ -19361,15 +19361,16 @@ } }, "node_modules/typescript": { - "version": "4.7.4", + "version": "5.0.2", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.0.2.tgz", + "integrity": "sha512-wVORMBGO/FAs/++blGNeAVdbNKtIh1rbBL2EyQ1+J9lClJ93KiiKe8PmFIVdXhHcyv44SL9oglmfeSsndo0jRw==", "dev": true, - "license": "Apache-2.0", "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" }, "engines": { - "node": ">=4.2.0" + "node": ">=12.20" } }, "node_modules/unbox-primitive": { @@ -33217,7 +33218,9 @@ } }, "typescript": { - "version": "4.7.4", + "version": "5.0.2", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.0.2.tgz", + "integrity": "sha512-wVORMBGO/FAs/++blGNeAVdbNKtIh1rbBL2EyQ1+J9lClJ93KiiKe8PmFIVdXhHcyv44SL9oglmfeSsndo0jRw==", "dev": true }, "unbox-primitive": { diff --git a/package.json b/package.json index 4002a64465..1abb0a2e67 100644 --- a/package.json +++ b/package.json @@ -168,7 +168,7 @@ "sass": "^1.52.3", "start-server-and-test": "^2.0.0", "strip-ansi": "^7.0.1", - "typescript": "^4.7.3", + "typescript": "^5.0.2", "unist-util-remove": "^3.1.1", "unist-util-visit-parents": "^5.1.3" }, diff --git a/src/events/hydro.js b/src/events/hydro.js index 2ab2b5516e..413f9b0bd3 100644 --- a/src/events/hydro.js +++ b/src/events/hydro.js @@ -1,197 +1,68 @@ -import crypto from 'crypto' -import { Agent } from 'https' - +import { createHmac } from 'crypto' +import { Agent } from 'node:https' import got from 'got' - import statsd from '../../lib/statsd.js' -import FailBot from '../../lib/failbot.js' +import { report } from '../../lib/failbot.js' +import { MAX_REQUEST_TIMEOUT } from '../../lib/constants.js' const TIME_OUT_TEXT = 'ms has passed since batch creation' +const X_HYDRO_APP = 'docs-production' +const CLUSTER = 'potomac' // We only have ability to publish externally to potomac cluster +const TIMEOUT = MAX_REQUEST_TIMEOUT - 1000 // Limit because Express will terminate at MAX_REQUEST_TIMEOUT +const RETRIES = 0 // We care about aggregate statistics; a few dropped events isn't a big deal +const httpsAgent = new Agent({ keepAlive: true, maxSockets: 32 }) // keepAlive: https://gh.io/AAk2qio -- 32: https://bit.ly/3Tywd1U +const isProd = process.env.NODE_ENV === 'production' -const MOCK_HYDRO_POST = - process.env.NODE_ENV === 'test' || JSON.parse(process.env.MOCK_HYDRO_POST || 'false') - -// The point of all of this is to come in under 10 seconds. Because beyond -// 10 seconds the `middleware/timeout.js` will cancel the whole request. -// My capping the retry delay to max 2s, if Hydro is infinitely slow, -// what will happen here is: -// -// 1s + 0s + -// 1s + 1s + -// 1s + 2s + -// 1s + 2s = ~9s -// -// Which means it will try 4 times, (first + 3 retries) with a timeout -// of 1000ms each time. -// The hope is that a patient retry is more useful than fewer individually -// longer timeouts. -const calculateDelay = ({ computedValue }) => Math.min(computedValue, 2000) -const retryConfiguration = { limit: 3, calculateDelay } -const timeoutConfiguration = { response: 1000 } - -let _agent -function getHttpsAgent() { - if (!_agent) { - const agentOptions = { - // The most important option. This is false by default. - keepAlive: true, - // 32 because it's what's recommended here - // https://docs.microsoft.com/en-us/azure/app-service/app-service-web-nodejs-best-practices-and-troubleshoot-guide#my-node-application-is-making-excessive-outbound-calls - maxSockets: 32, - } - _agent = new Agent(agentOptions) +/* +`events` can be either like: + {schema, value} + or + [{schema, value}, {schema, value}, ...] +*/ +async function _publish( + events, + { secret, endpoint } = { + secret: process.env.HYDRO_SECRET, + endpoint: process.env.HYDRO_ENDPOINT, } - return _agent +) { + if (!secret || !endpoint) { + if (isProd) throw new Error('Configure Hydro') + return { statusCode: 200 } + } + + events = Array.isArray(events) ? events : [events] + const requestBody = JSON.stringify({ + events: events.map(({ schema, value }) => ({ + cluster: CLUSTER, + schema, + value: JSON.stringify(value), // We must double-encode the value property + })), + }) + const token = createHmac('sha256', secret).update(requestBody).digest('hex') + + const response = await got.post(endpoint, { + body: requestBody, + agent: { https: httpsAgent }, + headers: { + Authorization: `Hydro ${token}`, + 'Content-Type': 'application/json', + 'X-Hydro-App': X_HYDRO_APP, + }, + throwHttpErrors: false, + retry: { limit: RETRIES }, + timeout: { request: TIMEOUT }, + }) + const { statusCode, body } = response + + statsd.increment('hydro.response_code.all', 1, [`response_code:${statusCode}`]) + + // Track 3xx and 4xx in Sentry; 5xx is tracked separately from the Docs project + if (statusCode >= 300 && statusCode < 500 && !body.includes(TIME_OUT_TEXT)) { + report(new Error(`Failed to send event to Hydro (${statusCode})`), { statusCode, body }) + } + + return response } -export default class Hydro { - constructor({ secret, endpoint, forceDisableMock } = {}) { - // When mocking, the secret isn't important because nothing's actually - // password protected in terms of HTTP authorization. But, the - // secret is used for creating an HMAC payload so it has to be - // a string. - this.secret = secret || process.env.HYDRO_SECRET || (MOCK_HYDRO_POST && '') - this.endpoint = endpoint || process.env.HYDRO_ENDPOINT - // This class is involved in 2 types of jest tests: - // 1. end-to-end tests where jest talks to localhost:4000 (with NODE_ENV===test) - // 2. literal unit tests that might mock the socket stuff - // Because `MOCK_HYDRO_POST = process.env.NODE_ENV === 'test'` gets set - // for either type of jest tests, this additional setting makes it - // possible to override `process.env.NODE_ENV === 'test'` from - // mocking the HTTP calls. - this.forceDisableMock = forceDisableMock - } - - /** - * Can check if it can actually send to Hydro - */ - maySend() { - return Boolean(this.secret && this.endpoint) || MOCK_HYDRO_POST - } - - /** - * Generate a SHA256 hash of the payload using the secret - * to authenticate with Hydro - * @param {string} body - */ - generatePayloadHmac(body) { - return crypto.createHmac('sha256', this.secret).update(body).digest('hex') - } - - /** - * Publish a single event to Hydro - * @param {string} schema - * @param {any} value - */ - async publish(schema, value) { - const body = JSON.stringify({ - events: [ - { - schema, - value: JSON.stringify(value), // We must double-encode the value property - cluster: 'potomac', // We only have ability to publish externally to potomac cluster - }, - ], - }) - const token = this.generatePayloadHmac(body) - - const agent = getHttpsAgent() - - const doPost = async () => { - // We *could* exit early on this whole `publish()` method if we know - // we're going to "mock" Hydro anyway, but injecting here, before - // the actual network operation, we make most of this method's code - // execute without actually depending on real network. This is - // good for any functional tests that depend on this, e.g. jest tests. - if (MOCK_HYDRO_POST && !this.forceDisableMock) { - return { statusCode: 200 } - } - return got(this.endpoint, { - method: 'POST', - body, - headers: { - Authorization: `Hydro ${token}`, - 'Content-Type': 'application/json', - 'X-Hydro-App': 'docs-production', - }, - // Because we prefer to handle the status code manually below - throwHttpErrors: false, - agent: { - // Deliberately not setting up a `http` or `http2` agent - // because it won't be used for this particular `got` request. - https: agent, - }, - // See above, where these are configured for the explanation - retry: retryConfiguration, - timeout: timeoutConfiguration, - }) - } - - const res = await statsd.asyncTimer(doPost, 'hydro.response_time')() - - const statTags = [`response_code:${res.statusCode}`] - statsd.increment(`hydro.response_code.${res.statusCode}`, 1, statTags) - statsd.increment('hydro.response_code.all', 1, statTags) - - // Track hydro exceptions in Sentry, - // but don't track 5xx because we can't do anything about service availability - if (res.statusCode !== 200 && res.statusCode < 500) { - const hydroText = await res.text() - const err = new Error( - `Hydro request failed: (${res.statusCode}) ${res.statusMessage} - ${hydroText}` - ) - err.status = res.statusCode - - // If the Hydro request failed as an "Unprocessable Entity": - // - If it was a timeout, don't log it to Sentry - // - If not, log it to Sentry for diagnostics - const hydroFailures = [] - if (res.statusCode === 422) { - let failureResponse - try { - failureResponse = JSON.parse(hydroText) - } catch (error) { - // Not JSON... ignore it - } - - if (failureResponse) { - const { failures } = failureResponse - if (Array.isArray(failures) && failures.length > 0) { - // IMPORTANT: Although these timeouts typically contain a `retriable: true` property, - // our discussions with the Hydro team left us deciding we did NOT want to retry - // sending them. The timeout response does NOT guarantee that the original message - // failed to make it through. As such, if we resend, we may create duplicate events; - // if we don't, we may drop events. - - // Find the timeouts, if any - const timeoutFailures = failures.filter(({ error }) => error.includes(TIME_OUT_TEXT)) - - // If there were ONLY timeouts, throw the error to avoid logging to Sentry - if (timeoutFailures.length === failures.length) { - err.message = `Hydro timed out: ${failures}` - err.status = 503 // Give it a more accurate error code - throw err - } - - // Compile list of the other failures for logging - hydroFailures.push(...failures.filter(({ error }) => !error.includes(TIME_OUT_TEXT))) - } - } - - console.error( - `Hydro schema validation failed:\n - Request: ${body}\n - Failure: (${res.statusCode}) ${hydroText}` - ) - } - - FailBot.report(err, { - hydroStatus: res.statusCode, - hydroText, - hydroFailures, - }) - - throw err - } - - return res - } -} +export const publish = statsd.asyncTimer(_publish, 'hydro.response_time') diff --git a/src/events/middleware.js b/src/events/middleware.js index 1222f91847..e217f34c69 100644 --- a/src/events/middleware.js +++ b/src/events/middleware.js @@ -5,9 +5,8 @@ import addFormats from 'ajv-formats' import { schemas, hydroNames } from './schema.js' import catchMiddlewareError from '../../middleware/catch-middleware-error.js' import { noCacheControl } from '../../middleware/cache-control.js' -import Hydro from './hydro.js' +import { publish } from './hydro.js' -const hydro = new Hydro() const router = express.Router() const ajv = new Ajv() addFormats(ajv) @@ -33,12 +32,13 @@ router.post( } res.json({}) - if (hydro.maySend()) { - try { - await hydro.publish(hydroNames[req.body.type], omit(req.body, OMIT_FIELDS)) - } catch (err) { - console.error('Failed to submit event to Hydro', err) - } + try { + await publish({ + schema: hydroNames[type], + value: omit(req.body, OMIT_FIELDS), + }) + } catch (err) { + console.error('Failed to submit event to Hydro', err) } }) ) diff --git a/src/events/tests/hydro.js b/src/events/tests/hydro.js index b679f6a61a..ba10e35271 100644 --- a/src/events/tests/hydro.js +++ b/src/events/tests/hydro.js @@ -1,67 +1,68 @@ import { afterEach } from '@jest/globals' import nock from 'nock' -import Hydro from '../hydro.js' +import { publish } from '../hydro.js' -describe('hydro', () => { - let hydro, params +describe('Hydro', () => { + const secret = '3BD22A91' + const endpoint = 'http://example.com' + const config = { secret, endpoint } - beforeEach(() => { - hydro = new Hydro({ - secret: '123', - endpoint: 'https://real-hydro.com', - // When jest tests run, `NODE_ENV==='test'` so the actualy `got()` - // calls inside the Hydro class would be prevented. - // Setting this to true will prevent that second-layer protection. - forceDisableMock: true, - }) + afterEach(() => { + nock.cleanAll() + }) - nock(hydro.endpoint, { + it('publishes a single event', async () => { + const scope = nock(endpoint, { reqheaders: { - Authorization: /^Hydro [\d\w]{64}$/, + Authorization: /^Hydro \w{64}$/, 'Content-Type': 'application/json', 'X-Hydro-App': 'docs-production', }, }) - // Respond with a 200 and store the body we sent - .post('/') - .reply(200, (_, body) => { - params = body - }) - }) - - afterEach(() => { - // Gotta always clean up after activating `nock`. - nock.cleanAll() - }) - - describe('#publish', () => { - it('publishes a single event to Hydro', async () => { - await hydro.publish('event-name', { pizza: true }) - expect(params).toEqual({ + .post('/', { events: [ { - schema: 'event-name', - value: JSON.stringify({ pizza: true }), + schema: 'docs.v0.ExampleEvent', + value: JSON.stringify({ event_id: 'FA36EA6D' }), cluster: 'potomac', }, ], }) - }) + .reply(200) + await publish({ schema: 'docs.v0.ExampleEvent', value: { event_id: 'FA36EA6D' } }, config) + expect(scope.isDone()).toBeTruthy() }) - describe('#generatePayloadHmac', () => { - it('returns a SHA256 HMAC string', () => { - const body = JSON.stringify({ pizza: true }) - const hash = hydro.generatePayloadHmac(body) - expect(hash).toEqual(expect.any(String)) - expect(hash).toHaveLength(64) - }) - - it('generates the same string for the same payload', () => { - const body = JSON.stringify({ pizza: true }) - const one = hydro.generatePayloadHmac(body) - const two = hydro.generatePayloadHmac(body) - expect(one).toBe(two) + it('publishes many events in one request', async () => { + const scope = nock(endpoint, { + reqheaders: { + Authorization: /^Hydro \w{64}$/, + 'Content-Type': 'application/json', + 'X-Hydro-App': 'docs-production', + }, }) + .post('/', { + events: [ + { + schema: 'docs.v0.ExampleEvent', + value: JSON.stringify({ event_id: 'FA36EA6D' }), + cluster: 'potomac', + }, + { + schema: 'docs.v0.ExampleEvent', + value: JSON.stringify({ event_id: '4F60C35A' }), + cluster: 'potomac', + }, + ], + }) + .reply(200) + await publish( + [ + { schema: 'docs.v0.ExampleEvent', value: { event_id: 'FA36EA6D' } }, + { schema: 'docs.v0.ExampleEvent', value: { event_id: '4F60C35A' } }, + ], + config + ) + expect(scope.isDone()).toBeTruthy() }) })