92 lines
3.2 KiB
JavaScript
92 lines
3.2 KiB
JavaScript
import path from 'path'
|
|
import fs from 'fs'
|
|
import crypto from 'crypto'
|
|
|
|
import got from 'got'
|
|
import statsd from '#src/observability/lib/statsd.js'
|
|
|
|
// The only reason this is exported is for the sake of the unit tests'
|
|
// ability to test in-memory miss after purging this with a mutation
|
|
export const cache = new Map()
|
|
|
|
const inProd = process.env.NODE_ENV === 'production'
|
|
|
|
// Wrapper on `got()` that is able to both cache in memory and on disk.
|
|
// The on-disk caching is in `.remotejson/`.
|
|
// We use this for downloading `redirects.json` files from the
|
|
// help-docs-archived-enterprise-versions repo as a proxy. A lot of those
|
|
// .json files are large and they're also static which makes them
|
|
// ideal for caching.
|
|
// Note that there's 2 layers of caching here:
|
|
// 1. Is it in memory cache?
|
|
// 2. No, is it on disk?
|
|
// 3. No, download from the internet then store responses in memory and disk
|
|
export default async function getRemoteJSON(url, config) {
|
|
// We could get fancy and make the cache key depend on the `config` too
|
|
// given that this is A) only used for archived enterprise stuff,
|
|
// and B) the config is only applicable on cache miss when doing the `got()`.
|
|
const cacheKey = url
|
|
|
|
// Assume it's in the in-memory cache first.
|
|
// Later we'll update this if we find we need to.
|
|
let fromCache = 'memory'
|
|
|
|
if (!cache.has(cacheKey)) {
|
|
fromCache = 'not'
|
|
|
|
let foundOnDisk = false
|
|
const tempFilename = crypto.createHash('md5').update(url).digest('hex')
|
|
|
|
// Do this here instead of at the top of the file so that it becomes
|
|
// possible to override this in unit tests.
|
|
const ROOT = process.env.GET_REMOTE_JSON_DISK_CACHE_ROOT || '.remotejson-cache'
|
|
|
|
const onDisk = path.join(ROOT, `${tempFilename}.json`)
|
|
|
|
try {
|
|
const body = fs.readFileSync(onDisk, 'utf-8')
|
|
// It might exist on disk, but it could be empty
|
|
if (body) {
|
|
try {
|
|
// It might be corrupted JSON.
|
|
cache.set(cacheKey, JSON.parse(body))
|
|
fromCache = 'disk'
|
|
foundOnDisk = true
|
|
} catch (error) {
|
|
if (!(error instanceof SyntaxError)) {
|
|
throw error
|
|
}
|
|
}
|
|
}
|
|
} catch (error) {
|
|
if (!(error instanceof SyntaxError || error.code === 'ENOENT')) {
|
|
throw error
|
|
}
|
|
}
|
|
|
|
if (!foundOnDisk) {
|
|
// got will, by default, follow redirects and it will throw if the ultimate
|
|
// response is not a 2xx.
|
|
// But it's possible that the page is a 200 OK but it's just not a JSON
|
|
// page at all. Then we can't assume we can deserialize it.
|
|
const res = await got(url, config)
|
|
if (!res.headers['content-type'].startsWith('application/json')) {
|
|
throw new Error(
|
|
`Fetching '${url}' resulted in a non-JSON response (${res.headers['content-type']})`,
|
|
)
|
|
}
|
|
cache.set(cacheKey, JSON.parse(res.body))
|
|
|
|
// Only write to disk for testing and local preview.
|
|
// In production, we never write to disk. Only in-memory.
|
|
if (!inProd) {
|
|
fs.mkdirSync(path.dirname(onDisk), { recursive: true })
|
|
fs.writeFileSync(onDisk, res.body, 'utf-8')
|
|
}
|
|
}
|
|
}
|
|
const tags = [`url:${url}`, `from_cache:${fromCache}`]
|
|
statsd.increment('middleware.get_remote_json', 1, tags)
|
|
return cache.get(cacheKey)
|
|
}
|