1
0
mirror of synced 2025-12-25 02:17:36 -05:00

Merge pull request #24611 from github/repo-sync

repo sync
This commit is contained in:
Octomerger Bot
2023-03-22 12:36:49 -04:00
committed by GitHub
7 changed files with 127 additions and 253 deletions

View File

@@ -1,3 +1,4 @@
export const ROOT = process.env.ROOT || '.'
export const USER_LANGUAGE_COOKIE_NAME = 'user_language'
export const TRANSLATIONS_ROOT = process.env.TRANSLATIONS_ROOT || 'translations'
export const MAX_REQUEST_TIMEOUT = parseInt(process.env.REQUEST_TIMEOUT || 10000, 10)

View File

@@ -1,14 +1,12 @@
import timeout from 'express-timeout-handler'
import statsd from '../lib/statsd.js'
// Heroku router requests timeout after 30 seconds. We should stop them earlier!
const maxRequestTimeout = parseInt(process.env.REQUEST_TIMEOUT || 10000, 10)
import { MAX_REQUEST_TIMEOUT } from '../lib/constants.js'
export default timeout.handler({
// Default timeout for all endpoints
// To override for a given router/endpoint, use `xExpressTimeoutHandler.set(...)`
timeout: maxRequestTimeout,
timeout: MAX_REQUEST_TIMEOUT,
// IMPORTANT:
// We cannot allow the middleware to disable the `res` object's methods like

13
package-lock.json generated
View File

@@ -166,7 +166,7 @@
"sass": "^1.52.3",
"start-server-and-test": "^2.0.0",
"strip-ansi": "^7.0.1",
"typescript": "^4.7.3",
"typescript": "^5.0.2",
"unist-util-remove": "^3.1.1",
"unist-util-visit-parents": "^5.1.3"
},
@@ -19361,15 +19361,16 @@
}
},
"node_modules/typescript": {
"version": "4.7.4",
"version": "5.0.2",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.0.2.tgz",
"integrity": "sha512-wVORMBGO/FAs/++blGNeAVdbNKtIh1rbBL2EyQ1+J9lClJ93KiiKe8PmFIVdXhHcyv44SL9oglmfeSsndo0jRw==",
"dev": true,
"license": "Apache-2.0",
"bin": {
"tsc": "bin/tsc",
"tsserver": "bin/tsserver"
},
"engines": {
"node": ">=4.2.0"
"node": ">=12.20"
}
},
"node_modules/unbox-primitive": {
@@ -33217,7 +33218,9 @@
}
},
"typescript": {
"version": "4.7.4",
"version": "5.0.2",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.0.2.tgz",
"integrity": "sha512-wVORMBGO/FAs/++blGNeAVdbNKtIh1rbBL2EyQ1+J9lClJ93KiiKe8PmFIVdXhHcyv44SL9oglmfeSsndo0jRw==",
"dev": true
},
"unbox-primitive": {

View File

@@ -168,7 +168,7 @@
"sass": "^1.52.3",
"start-server-and-test": "^2.0.0",
"strip-ansi": "^7.0.1",
"typescript": "^4.7.3",
"typescript": "^5.0.2",
"unist-util-remove": "^3.1.1",
"unist-util-visit-parents": "^5.1.3"
},

View File

@@ -1,197 +1,68 @@
import crypto from 'crypto'
import { Agent } from 'https'
import { createHmac } from 'crypto'
import { Agent } from 'node:https'
import got from 'got'
import statsd from '../../lib/statsd.js'
import FailBot from '../../lib/failbot.js'
import { report } from '../../lib/failbot.js'
import { MAX_REQUEST_TIMEOUT } from '../../lib/constants.js'
const TIME_OUT_TEXT = 'ms has passed since batch creation'
const X_HYDRO_APP = 'docs-production'
const CLUSTER = 'potomac' // We only have ability to publish externally to potomac cluster
const TIMEOUT = MAX_REQUEST_TIMEOUT - 1000 // Limit because Express will terminate at MAX_REQUEST_TIMEOUT
const RETRIES = 0 // We care about aggregate statistics; a few dropped events isn't a big deal
const httpsAgent = new Agent({ keepAlive: true, maxSockets: 32 }) // keepAlive: https://gh.io/AAk2qio -- 32: https://bit.ly/3Tywd1U
const isProd = process.env.NODE_ENV === 'production'
const MOCK_HYDRO_POST =
process.env.NODE_ENV === 'test' || JSON.parse(process.env.MOCK_HYDRO_POST || 'false')
// The point of all of this is to come in under 10 seconds. Because beyond
// 10 seconds the `middleware/timeout.js` will cancel the whole request.
// My capping the retry delay to max 2s, if Hydro is infinitely slow,
// what will happen here is:
//
// 1s + 0s +
// 1s + 1s +
// 1s + 2s +
// 1s + 2s = ~9s
//
// Which means it will try 4 times, (first + 3 retries) with a timeout
// of 1000ms each time.
// The hope is that a patient retry is more useful than fewer individually
// longer timeouts.
const calculateDelay = ({ computedValue }) => Math.min(computedValue, 2000)
const retryConfiguration = { limit: 3, calculateDelay }
const timeoutConfiguration = { response: 1000 }
let _agent
function getHttpsAgent() {
if (!_agent) {
const agentOptions = {
// The most important option. This is false by default.
keepAlive: true,
// 32 because it's what's recommended here
// https://docs.microsoft.com/en-us/azure/app-service/app-service-web-nodejs-best-practices-and-troubleshoot-guide#my-node-application-is-making-excessive-outbound-calls
maxSockets: 32,
}
_agent = new Agent(agentOptions)
/*
`events` can be either like:
{schema, value}
or
[{schema, value}, {schema, value}, ...]
*/
async function _publish(
events,
{ secret, endpoint } = {
secret: process.env.HYDRO_SECRET,
endpoint: process.env.HYDRO_ENDPOINT,
}
return _agent
) {
if (!secret || !endpoint) {
if (isProd) throw new Error('Configure Hydro')
return { statusCode: 200 }
}
events = Array.isArray(events) ? events : [events]
const requestBody = JSON.stringify({
events: events.map(({ schema, value }) => ({
cluster: CLUSTER,
schema,
value: JSON.stringify(value), // We must double-encode the value property
})),
})
const token = createHmac('sha256', secret).update(requestBody).digest('hex')
const response = await got.post(endpoint, {
body: requestBody,
agent: { https: httpsAgent },
headers: {
Authorization: `Hydro ${token}`,
'Content-Type': 'application/json',
'X-Hydro-App': X_HYDRO_APP,
},
throwHttpErrors: false,
retry: { limit: RETRIES },
timeout: { request: TIMEOUT },
})
const { statusCode, body } = response
statsd.increment('hydro.response_code.all', 1, [`response_code:${statusCode}`])
// Track 3xx and 4xx in Sentry; 5xx is tracked separately from the Docs project
if (statusCode >= 300 && statusCode < 500 && !body.includes(TIME_OUT_TEXT)) {
report(new Error(`Failed to send event to Hydro (${statusCode})`), { statusCode, body })
}
return response
}
export default class Hydro {
constructor({ secret, endpoint, forceDisableMock } = {}) {
// When mocking, the secret isn't important because nothing's actually
// password protected in terms of HTTP authorization. But, the
// secret is used for creating an HMAC payload so it has to be
// a string.
this.secret = secret || process.env.HYDRO_SECRET || (MOCK_HYDRO_POST && '')
this.endpoint = endpoint || process.env.HYDRO_ENDPOINT
// This class is involved in 2 types of jest tests:
// 1. end-to-end tests where jest talks to localhost:4000 (with NODE_ENV===test)
// 2. literal unit tests that might mock the socket stuff
// Because `MOCK_HYDRO_POST = process.env.NODE_ENV === 'test'` gets set
// for either type of jest tests, this additional setting makes it
// possible to override `process.env.NODE_ENV === 'test'` from
// mocking the HTTP calls.
this.forceDisableMock = forceDisableMock
}
/**
* Can check if it can actually send to Hydro
*/
maySend() {
return Boolean(this.secret && this.endpoint) || MOCK_HYDRO_POST
}
/**
* Generate a SHA256 hash of the payload using the secret
* to authenticate with Hydro
* @param {string} body
*/
generatePayloadHmac(body) {
return crypto.createHmac('sha256', this.secret).update(body).digest('hex')
}
/**
* Publish a single event to Hydro
* @param {string} schema
* @param {any} value
*/
async publish(schema, value) {
const body = JSON.stringify({
events: [
{
schema,
value: JSON.stringify(value), // We must double-encode the value property
cluster: 'potomac', // We only have ability to publish externally to potomac cluster
},
],
})
const token = this.generatePayloadHmac(body)
const agent = getHttpsAgent()
const doPost = async () => {
// We *could* exit early on this whole `publish()` method if we know
// we're going to "mock" Hydro anyway, but injecting here, before
// the actual network operation, we make most of this method's code
// execute without actually depending on real network. This is
// good for any functional tests that depend on this, e.g. jest tests.
if (MOCK_HYDRO_POST && !this.forceDisableMock) {
return { statusCode: 200 }
}
return got(this.endpoint, {
method: 'POST',
body,
headers: {
Authorization: `Hydro ${token}`,
'Content-Type': 'application/json',
'X-Hydro-App': 'docs-production',
},
// Because we prefer to handle the status code manually below
throwHttpErrors: false,
agent: {
// Deliberately not setting up a `http` or `http2` agent
// because it won't be used for this particular `got` request.
https: agent,
},
// See above, where these are configured for the explanation
retry: retryConfiguration,
timeout: timeoutConfiguration,
})
}
const res = await statsd.asyncTimer(doPost, 'hydro.response_time')()
const statTags = [`response_code:${res.statusCode}`]
statsd.increment(`hydro.response_code.${res.statusCode}`, 1, statTags)
statsd.increment('hydro.response_code.all', 1, statTags)
// Track hydro exceptions in Sentry,
// but don't track 5xx because we can't do anything about service availability
if (res.statusCode !== 200 && res.statusCode < 500) {
const hydroText = await res.text()
const err = new Error(
`Hydro request failed: (${res.statusCode}) ${res.statusMessage} - ${hydroText}`
)
err.status = res.statusCode
// If the Hydro request failed as an "Unprocessable Entity":
// - If it was a timeout, don't log it to Sentry
// - If not, log it to Sentry for diagnostics
const hydroFailures = []
if (res.statusCode === 422) {
let failureResponse
try {
failureResponse = JSON.parse(hydroText)
} catch (error) {
// Not JSON... ignore it
}
if (failureResponse) {
const { failures } = failureResponse
if (Array.isArray(failures) && failures.length > 0) {
// IMPORTANT: Although these timeouts typically contain a `retriable: true` property,
// our discussions with the Hydro team left us deciding we did NOT want to retry
// sending them. The timeout response does NOT guarantee that the original message
// failed to make it through. As such, if we resend, we may create duplicate events;
// if we don't, we may drop events.
// Find the timeouts, if any
const timeoutFailures = failures.filter(({ error }) => error.includes(TIME_OUT_TEXT))
// If there were ONLY timeouts, throw the error to avoid logging to Sentry
if (timeoutFailures.length === failures.length) {
err.message = `Hydro timed out: ${failures}`
err.status = 503 // Give it a more accurate error code
throw err
}
// Compile list of the other failures for logging
hydroFailures.push(...failures.filter(({ error }) => !error.includes(TIME_OUT_TEXT)))
}
}
console.error(
`Hydro schema validation failed:\n - Request: ${body}\n - Failure: (${res.statusCode}) ${hydroText}`
)
}
FailBot.report(err, {
hydroStatus: res.statusCode,
hydroText,
hydroFailures,
})
throw err
}
return res
}
}
export const publish = statsd.asyncTimer(_publish, 'hydro.response_time')

View File

@@ -5,9 +5,8 @@ import addFormats from 'ajv-formats'
import { schemas, hydroNames } from './schema.js'
import catchMiddlewareError from '../../middleware/catch-middleware-error.js'
import { noCacheControl } from '../../middleware/cache-control.js'
import Hydro from './hydro.js'
import { publish } from './hydro.js'
const hydro = new Hydro()
const router = express.Router()
const ajv = new Ajv()
addFormats(ajv)
@@ -33,12 +32,13 @@ router.post(
}
res.json({})
if (hydro.maySend()) {
try {
await hydro.publish(hydroNames[req.body.type], omit(req.body, OMIT_FIELDS))
} catch (err) {
console.error('Failed to submit event to Hydro', err)
}
try {
await publish({
schema: hydroNames[type],
value: omit(req.body, OMIT_FIELDS),
})
} catch (err) {
console.error('Failed to submit event to Hydro', err)
}
})
)

View File

@@ -1,67 +1,68 @@
import { afterEach } from '@jest/globals'
import nock from 'nock'
import Hydro from '../hydro.js'
import { publish } from '../hydro.js'
describe('hydro', () => {
let hydro, params
describe('Hydro', () => {
const secret = '3BD22A91'
const endpoint = 'http://example.com'
const config = { secret, endpoint }
beforeEach(() => {
hydro = new Hydro({
secret: '123',
endpoint: 'https://real-hydro.com',
// When jest tests run, `NODE_ENV==='test'` so the actualy `got()`
// calls inside the Hydro class would be prevented.
// Setting this to true will prevent that second-layer protection.
forceDisableMock: true,
})
afterEach(() => {
nock.cleanAll()
})
nock(hydro.endpoint, {
it('publishes a single event', async () => {
const scope = nock(endpoint, {
reqheaders: {
Authorization: /^Hydro [\d\w]{64}$/,
Authorization: /^Hydro \w{64}$/,
'Content-Type': 'application/json',
'X-Hydro-App': 'docs-production',
},
})
// Respond with a 200 and store the body we sent
.post('/')
.reply(200, (_, body) => {
params = body
})
})
afterEach(() => {
// Gotta always clean up after activating `nock`.
nock.cleanAll()
})
describe('#publish', () => {
it('publishes a single event to Hydro', async () => {
await hydro.publish('event-name', { pizza: true })
expect(params).toEqual({
.post('/', {
events: [
{
schema: 'event-name',
value: JSON.stringify({ pizza: true }),
schema: 'docs.v0.ExampleEvent',
value: JSON.stringify({ event_id: 'FA36EA6D' }),
cluster: 'potomac',
},
],
})
})
.reply(200)
await publish({ schema: 'docs.v0.ExampleEvent', value: { event_id: 'FA36EA6D' } }, config)
expect(scope.isDone()).toBeTruthy()
})
describe('#generatePayloadHmac', () => {
it('returns a SHA256 HMAC string', () => {
const body = JSON.stringify({ pizza: true })
const hash = hydro.generatePayloadHmac(body)
expect(hash).toEqual(expect.any(String))
expect(hash).toHaveLength(64)
})
it('generates the same string for the same payload', () => {
const body = JSON.stringify({ pizza: true })
const one = hydro.generatePayloadHmac(body)
const two = hydro.generatePayloadHmac(body)
expect(one).toBe(two)
it('publishes many events in one request', async () => {
const scope = nock(endpoint, {
reqheaders: {
Authorization: /^Hydro \w{64}$/,
'Content-Type': 'application/json',
'X-Hydro-App': 'docs-production',
},
})
.post('/', {
events: [
{
schema: 'docs.v0.ExampleEvent',
value: JSON.stringify({ event_id: 'FA36EA6D' }),
cluster: 'potomac',
},
{
schema: 'docs.v0.ExampleEvent',
value: JSON.stringify({ event_id: '4F60C35A' }),
cluster: 'potomac',
},
],
})
.reply(200)
await publish(
[
{ schema: 'docs.v0.ExampleEvent', value: { event_id: 'FA36EA6D' } },
{ schema: 'docs.v0.ExampleEvent', value: { event_id: '4F60C35A' } },
],
config
)
expect(scope.isDone()).toBeTruthy()
})
})