diff --git a/.github/workflows/prod-build-deploy.yml b/.github/workflows/prod-build-deploy.yml new file mode 100644 index 0000000000..f5837cc689 --- /dev/null +++ b/.github/workflows/prod-build-deploy.yml @@ -0,0 +1,229 @@ +name: Production - Build and Deploy + +# **What it does**: Builds and deploys the default branch to production +# **Why we have it**: To enable us to deploy the latest to production whenever necessary rather than relying on PR merges. +# **Who does it impact**: All contributors. + +on: + push: + branches: + - main + +jobs: + build-and-deploy: + if: ${{ github.repository == 'github/docs-internal'}} + runs-on: ubuntu-latest + timeout-minutes: 15 + concurrency: + group: prod_deploy + cancel-in-progress: true + steps: + - name: Check out repo + uses: actions/checkout@5a4ac9002d0be2fb38bd78e4b4dbde5606d7042f + with: + persist-credentials: 'false' + + - name: Setup node + uses: actions/setup-node@38d90ce44d5275ad62cc48384b3d8a58c500bb5f + with: + node-version: 16.8.x + cache: npm + + # Required for `npm pkg ...` command support + - name: Update to npm@^7.20.0 + run: npm install --global npm@^7.20.0 + + - name: Install dependencies + run: npm ci + + - name: Clone early access + run: node script/early-access/clone-for-build.js + env: + DOCUBOT_REPO_PAT: ${{ secrets.DOCUBOT_REPO_PAT }} + GIT_BRANCH: main + + - name: Build + run: npm run build + + - name: Remove development-only dependencies + run: npm prune --production + + - name: Remove all npm scripts + run: npm pkg delete scripts + + - name: Set npm script for Heroku build to noop + run: npm set-script heroku-postbuild "echo 'Application was pre-built!'" + + - name: Create a gzipped archive + run: | + tar -cz --file=app.tar.gz \ + node_modules/ \ + .next/ \ + assets/ \ + content/ \ + data/ \ + includes/ \ + lib/ \ + middleware/ \ + translations/ \ + server.mjs \ + package*.json \ + .npmrc \ + feature-flags.json \ + next.config.js \ + app.json \ + Procfile + + - name: Install the development dependencies again + run: npm install + + - name: Create a Heroku build source + id: build-source + uses: actions/github-script@2b34a689ec86a68d8ab9478298f91d5401337b7d + env: + HEROKU_API_TOKEN: ${{ secrets.HEROKU_API_TOKEN }} + with: + script: | + const { owner, repo } = context.repo + + if (owner !== 'github') { + throw new Error(`Repository owner must be 'github' but was: ${owner}`) + } + if (repo !== 'docs-internal') { + throw new Error(`Repository name must be 'docs-internal' but was: ${repo}`) + } + + const Heroku = require('heroku-client') + const heroku = new Heroku({ token: process.env.HEROKU_API_TOKEN }) + + const { source_blob: sourceBlob } = await heroku.post('/sources') + const { put_url: uploadUrl, get_url: downloadUrl } = sourceBlob + + core.setOutput('upload_url', uploadUrl) + core.setOutput('download_url', downloadUrl) + + # See: https://devcenter.heroku.com/articles/build-and-release-using-the-api#sources-endpoint + - name: Upload to the Heroku build source + env: + UPLOAD_URL: ${{ steps.build-source.outputs.upload_url }} + run: | + curl "$UPLOAD_URL" \ + -X PUT \ + -H 'Content-Type:' \ + --data-binary @app.tar.gz + + - name: Install one-off development-only dependencies + run: npm install --no-save --include=optional esm + + - name: Deploy + id: deploy + uses: actions/github-script@2b34a689ec86a68d8ab9478298f91d5401337b7d + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + HEROKU_API_TOKEN: ${{ secrets.HEROKU_API_TOKEN }} + HYDRO_ENDPOINT: ${{ secrets.HYDRO_ENDPOINT }} + HYDRO_SECRET: ${{ secrets.HYDRO_SECRET }} + SOURCE_BLOB_URL: ${{ steps.build-source.outputs.download_url }} + with: + script: | + const { GITHUB_TOKEN, HEROKU_API_TOKEN, SOURCE_BLOB_URL } = process.env + + // Exit if GitHub Actions PAT is not found + if (!GITHUB_TOKEN) { + throw new Error('You must supply a GITHUB_TOKEN environment variable!') + } + + // Exit if Heroku API token is not found + if (!HEROKU_API_TOKEN) { + throw new Error('You must supply a HEROKU_API_TOKEN environment variable!') + } + + // Workaround to allow us to load ESM files with `require(...)` + const esm = require('esm') + require = esm({}) + + const { default: getOctokit } = require('./script/helpers/github') + const { default: deployToProduction } = require('./script/deployment/deploy-to-production') + + // This helper uses the `GITHUB_TOKEN` implicitly! + // We're using our usual version of Octokit vs. the provided `github` + // instance to avoid versioning discrepancies. + const octokit = getOctokit() + + try { + await deployToProduction({ + octokit, + // These parameters will ONLY be set by Actions + sourceBlobUrl: SOURCE_BLOB_URL, + runId: context.runId + }) + } catch (error) { + console.error(`Failed to deploy to production: ${error.message}`) + console.error(error) + throw error + } + + - name: Mark the deployment as inactive if timed out + uses: actions/github-script@2b34a689ec86a68d8ab9478298f91d5401337b7d + if: ${{ steps.deploy.outcome == 'cancelled' }} + env: + DEPLOYMENT_ID: ${{ steps.deploy.outputs.deploymentId }} + LOG_URL: ${{ steps.deploy.outputs.logUrl }} + with: + script: | + const { DEPLOYMENT_ID, LOG_URL } = process.env + const { owner, repo } = context.repo + + if (!DEPLOYMENT_ID) { + throw new Error('A deployment wasn't created before a timeout occurred!') + } + + await github.repos.createDeploymentStatus({ + owner, + repo, + deployment_id: DEPLOYMENT_ID, + state: 'error', + description: 'The deployment step timed out. See workflow logs.', + log_url: LOG_URL, + // The 'ant-man' preview is required for `state` values of 'inactive', as well as + // the use of the `log_url`, `environment_url`, and `auto_inactive` parameters. + // The 'flash' preview is required for `state` values of 'in_progress' and 'queued'. + mediaType: { + previews: ['ant-man', 'flash'], + }, + }) + console.log('⏲️ Deployment status: error - The deployment timed out...') + + - name: Purge Fastly edge cache + uses: actions/github-script@2b34a689ec86a68d8ab9478298f91d5401337b7d + env: + FASTLY_TOKEN: ${{ secrets.FASTLY_TOKEN }} + FASTLY_SERVICE_ID: ${{ secrets.FASTLY_SERVICE_ID }} + FASTLY_SURROGATE_KEY: 'all-the-things' + DELAY_FOR_PREBOOT: 'true' + with: + script: | + // Workaround to allow us to load ESM files with `require(...)` + const esm = require('esm') + require = esm({}) + + const { default: purgeEdgeCache } = require('./script/deployment/purge-edge-cache') + + try { + await purgeEdgeCache({ + includeDelayForPreboot: process.env.DELAY_FOR_PREBOOT !== 'false' + }) + } catch (error) { + console.error(`Failed to purge the edge cache: ${error.message}`) + console.error(error) + throw error + } + + - name: Send Slack notification if workflow fails + uses: someimportantcompany/github-actions-slack-message@0b470c14b39da4260ed9e3f9a4f1298a74ccdefd + if: ${{ failure() }} + with: + channel: ${{ secrets.DOCS_ALERTS_SLACK_CHANNEL_ID }} + bot-token: ${{ secrets.SLACK_DOCS_BOT_TOKEN }} + color: failure + text: Production deployment failed at commit ${{ github.sha }}. See https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} diff --git a/script/deployment/deploy-to-production.js b/script/deployment/deploy-to-production.js new file mode 100755 index 0000000000..0f45ebeca7 --- /dev/null +++ b/script/deployment/deploy-to-production.js @@ -0,0 +1,418 @@ +#!/usr/bin/env node +import sleep from 'await-sleep' +import got from 'got' +import Heroku from 'heroku-client' +import { setOutput } from '@actions/core' + +const SLEEP_INTERVAL = 5000 +const HEROKU_LOG_LINES_TO_SHOW = 25 + +// Allow for a few 404 (Not Found) or 429 (Too Many Requests) responses from the +// semi-unreliable Heroku API when we're polling for status updates +const ALLOWED_MISSING_RESPONSE_COUNT = 5 + +export default async function deployToProduction({ + octokit, + // These parameters will only be set by Actions + sourceBlobUrl = null, + runId = null, +}) { + // Start a timer so we can report how long the deployment takes + const startTime = Date.now() + const [owner, repo, branch] = ['github', 'docs-internal', 'main'] + + let sha + try { + const { + data: { sha: latestSha }, + } = await octokit.repos.getCommit({ + owner, + repo, + ref: branch, + }) + sha = latestSha + + if (!sha) { + throw new Error('Latest commit SHA could not be found') + } + } catch (error) { + console.error(`Error: ${error}`) + console.log(`🛑 There was an error getting latest commit.`) + process.exit(1) + } + + // Put together application configuration variables + const isPrebuilt = !!sourceBlobUrl + const { DOCUBOT_REPO_PAT } = process.env + const appConfigVars = { + // Track the git branch + GIT_BRANCH: branch, + // If prebuilt: prevent the Heroku Node.js buildpack from installing devDependencies + NPM_CONFIG_PRODUCTION: isPrebuilt.toString(), + // If prebuilt: prevent the Heroku Node.js buildpack from using `npm ci` as it would + // delete all of the vendored "node_modules/" directory. + USE_NPM_INSTALL: isPrebuilt.toString(), + ...(!isPrebuilt && DOCUBOT_REPO_PAT && { DOCUBOT_REPO_PAT }), + } + + const workflowRunLog = runId ? `https://github.com/${owner}/${repo}/actions/runs/${runId}` : null + let deploymentId = null + let logUrl = workflowRunLog + + const appName = 'help-docs-prod-gha' + const homepageUrl = `https://${appName}.herokuapp.com/` + + try { + const title = `branch '${branch}' at commit '${sha}' in the 'production' environment as '${appName}'` + + console.log(`About to deploy ${title}...`) + + // Kick off a pending GitHub Deployment right away, so the PR author + // will have instant feedback that their work is being deployed. + const { data: deployment } = await octokit.repos.createDeployment({ + owner, + repo, + description: `Deploying ${title}`, + ref: sha, + + // In the GitHub API, there can only be one active deployment per environment. + environment: appName, + + // The status contexts to verify against commit status checks. If you omit + // this parameter, GitHub verifies all unique contexts before creating a + // deployment. To bypass checking entirely, pass an empty array. Defaults + // to all unique contexts. + required_contexts: [], + + // Do not try to merge the base branch into the feature branch + auto_merge: false, + }) + console.log('GitHub Deployment created', deployment) + + // Store this ID for later updating + deploymentId = deployment.id + + // Set some output variables for workflow steps that run after this script + if (process.env.GITHUB_ACTIONS) { + setOutput('deploymentId', deploymentId) + setOutput('logUrl', logUrl) + } + + await octokit.repos.createDeploymentStatus({ + owner, + repo, + deployment_id: deploymentId, + state: 'in_progress', + description: 'Deploying the app...', + // The 'ant-man' preview is required for `state` values of 'inactive', as well as + // the use of the `log_url`, `environment_url`, and `auto_inactive` parameters. + // The 'flash' preview is required for `state` values of 'in_progress' and 'queued'. + mediaType: { + previews: ['ant-man', 'flash'], + }, + }) + console.log('🚀 Deployment status: in_progress - Preparing to deploy the app...') + + // Time to talk to Heroku... + const heroku = new Heroku({ token: process.env.HEROKU_API_TOKEN }) + let build = null + + if (!sourceBlobUrl) { + try { + sourceBlobUrl = await getTarballUrl({ + octokit, + owner, + repo, + sha, + }) + } catch (error) { + throw new Error(`Failed to generate source blob URL. Error: ${error}`) + } + } + + console.log('Updating Heroku app configuration variables...') + + // Reconfigure environment variables + // https://devcenter.heroku.com/articles/platform-api-reference#config-vars-update + try { + await heroku.patch(`/apps/${appName}/config-vars`, { + body: appConfigVars, + }) + } catch (error) { + throw new Error(`Failed to update Heroku app configuration variables. Error: ${error}`) + } + + console.log('Reconfigured') + console.log('Building Heroku app...') + + try { + build = await heroku.post(`/apps/${appName}/builds`, { + body: { + source_blob: { + url: sourceBlobUrl, + }, + }, + }) + } catch (error) { + throw new Error(`Failed to create Heroku build. Error: ${error}`) + } + + console.log('Heroku build created', build) + + const buildStartTime = Date.now() // Close enough... + const buildId = build.id + logUrl = build.output_stream_url + + console.log('🚀 Deployment status: in_progress - Building a new Heroku slug...') + + // Poll until the Build's status changes from "pending" to "succeeded" or "failed". + let buildAcceptableErrorCount = 0 + while (!build || build.status === 'pending' || !build.release || !build.release.id) { + await sleep(SLEEP_INTERVAL) + try { + build = await heroku.get(`/apps/${appName}/builds/${buildId}`) + } catch (error) { + // Allow for a few bad responses from the Heroku API + if (error.statusCode === 404 || error.statusCode === 429) { + buildAcceptableErrorCount += 1 + if (buildAcceptableErrorCount <= ALLOWED_MISSING_RESPONSE_COUNT) { + continue + } + } + throw new Error(`Failed to get build status. Error: ${error}`) + } + + if (build && build.status === 'failed') { + throw new Error( + `Failed to build after ${Math.round( + (Date.now() - buildStartTime) / 1000 + )} seconds. See Heroku logs for more information:\n${logUrl}` + ) + } + + console.log( + `Heroku build status: ${(build || {}).status} (after ${Math.round( + (Date.now() - buildStartTime) / 1000 + )} seconds)` + ) + } + + console.log( + `Finished Heroku build after ${Math.round((Date.now() - buildStartTime) / 1000)} seconds.`, + build + ) + + const releaseStartTime = Date.now() // Close enough... + const releaseId = build.release.id + let release = null + + // Poll until the associated Release's status changes from "pending" to "succeeded" or "failed". + let releaseAcceptableErrorCount = 0 + while (!release || release.status === 'pending') { + await sleep(SLEEP_INTERVAL) + try { + const result = await heroku.get(`/apps/${appName}/releases/${releaseId}`) + + // Update the deployment status but only on the first retrieval + if (!release) { + logUrl = result.output_stream_url + + console.log('Heroku Release created', result) + + console.log('🚀 Deployment status: in_progress - Releasing the built Heroku slug...') + } + + release = result + } catch (error) { + // Allow for a few bad responses from the Heroku API + if (error.statusCode === 404 || error.statusCode === 429) { + releaseAcceptableErrorCount += 1 + if (releaseAcceptableErrorCount <= ALLOWED_MISSING_RESPONSE_COUNT) { + continue + } + } + throw new Error(`Failed to get release status. Error: ${error}`) + } + + if (release && release.status === 'failed') { + throw new Error( + `Failed to release after ${Math.round( + (Date.now() - releaseStartTime) / 1000 + )} seconds. See Heroku logs for more information:\n${logUrl}` + ) + } + + console.log( + `Release status: ${(release || {}).status} (after ${Math.round( + (Date.now() - releaseStartTime) / 1000 + )} seconds)` + ) + } + + console.log( + `Finished Heroku release after ${Math.round( + (Date.now() - releaseStartTime) / 1000 + )} seconds.`, + release + ) + + // Monitor dyno state for this release to ensure it reaches "up" rather than crashing. + // This will help us catch issues with faulty startup code and/or the package manifest. + const dynoBootStartTime = Date.now() + console.log('Checking Heroku dynos...') + logUrl = workflowRunLog + + console.log('🚀 Deployment status: in_progress - Monitoring the Heroku dyno start-up...') + + // Keep checking while there are still dynos in non-terminal states + let newDynos = [] + let dynoAcceptableErrorCount = 0 + while (newDynos.length === 0 || newDynos.some((dyno) => dyno.state === 'starting')) { + await sleep(SLEEP_INTERVAL) + try { + const dynoList = await heroku.get(`/apps/${appName}/dynos`) + const dynosForThisRelease = dynoList.filter((dyno) => dyno.release.id === releaseId) + + // To track them afterward + newDynos = dynosForThisRelease + + console.log( + `Dyno states: ${JSON.stringify(newDynos.map((dyno) => dyno.state))} (after ${Math.round( + (Date.now() - dynoBootStartTime) / 1000 + )} seconds)` + ) + } catch (error) { + // Allow for a few bad responses from the Heroku API + if (error.statusCode === 404 || error.statusCode === 429) { + dynoAcceptableErrorCount += 1 + if (dynoAcceptableErrorCount <= ALLOWED_MISSING_RESPONSE_COUNT) { + continue + } + } + throw new Error(`Failed to find dynos for this release. Error: ${error}`) + } + } + + const crashedDynos = newDynos.filter((dyno) => ['crashed', 'restarting'].includes(dyno.state)) + const runningDynos = newDynos.filter((dyno) => dyno.state === 'up') + + // If any dynos crashed on start-up, fail the deployment + if (crashedDynos.length > 0) { + const errorMessage = `At least ${crashedDynos.length} Heroku dyno(s) crashed on start-up!` + + console.error(errorMessage) + + // Attempt to dump some of the Heroku log here for debugging + try { + const logSession = await heroku.post(`/apps/${appName}/log-sessions`, { + body: { + dyno: crashedDynos[0].name, + lines: HEROKU_LOG_LINES_TO_SHOW, + tail: false, + }, + }) + + logUrl = logSession.logplex_url + + const logText = await got(logUrl).text() + console.error( + `Here are the last ${HEROKU_LOG_LINES_TO_SHOW} lines of the Heroku log:\n\n${logText}` + ) + } catch (error) { + // Don't fail because of this error + console.error(`Failed to retrieve the Heroku logs for the crashed dynos. Error: ${error}`) + } + + throw new Error(errorMessage) + } + + console.log( + `At least ${runningDynos.length} Heroku dyno(s) are ready after ${Math.round( + (Date.now() - dynoBootStartTime) / 1000 + )} seconds.` + ) + + // + // TODO: + // Should we consider adding an explicit 2-minute pause here to allow for + // Heroku Preboot to actually swap in the new dynos? + // + + // Report success! + const successMessage = `Deployment succeeded after ${Math.round( + (Date.now() - startTime) / 1000 + )} seconds.` + console.log(successMessage) + + await octokit.repos.createDeploymentStatus({ + owner, + repo, + deployment_id: deploymentId, + state: 'success', + description: successMessage, + ...(logUrl && { log_url: logUrl }), + environment_url: homepageUrl, + // The 'ant-man' preview is required for `state` values of 'inactive', as well as + // the use of the `log_url`, `environment_url`, and `auto_inactive` parameters. + // The 'flash' preview is required for `state` values of 'in_progress' and 'queued'. + mediaType: { + previews: ['ant-man', 'flash'], + }, + }) + + console.log(`🚀 Deployment status: success - ${successMessage}`) + console.log(`Visit the newly deployed app at: ${homepageUrl}`) + } catch (error) { + // Report failure! + const failureMessage = `Deployment failed after ${Math.round( + (Date.now() - startTime) / 1000 + )} seconds. See logs for more information.` + console.error(failureMessage) + + try { + if (deploymentId) { + await octokit.repos.createDeploymentStatus({ + owner, + repo, + deployment_id: deploymentId, + state: 'error', + description: failureMessage, + ...(logUrl && { log_url: logUrl }), + environment_url: homepageUrl, + // The 'ant-man' preview is required for `state` values of 'inactive', as well as + // the use of the `log_url`, `environment_url`, and `auto_inactive` parameters. + // The 'flash' preview is required for `state` values of 'in_progress' and 'queued'. + mediaType: { + previews: ['ant-man', 'flash'], + }, + }) + + console.log( + `🚀 Deployment status: error - ${failureMessage}` + (logUrl ? ` Logs: ${logUrl}` : '') + ) + } + } catch (error) { + console.error(`Failed to finalize GitHub Deployment Status as a failure. Error: ${error}`) + } + + // Re-throw the error to bubble up + throw error + } +} + +async function getTarballUrl({ octokit, owner, repo, sha }) { + // Get a URL for the tarballed source code bundle + const { + headers: { location: tarballUrl }, + } = await octokit.repos.downloadTarballArchive({ + owner, + repo, + ref: sha, + // Override the underlying `node-fetch` module's `redirect` option + // configuration to prevent automatically following redirects. + request: { + redirect: 'manual', + }, + }) + return tarballUrl +} diff --git a/script/deployment/purge-edge-cache.js b/script/deployment/purge-edge-cache.js new file mode 100644 index 0000000000..7024417882 --- /dev/null +++ b/script/deployment/purge-edge-cache.js @@ -0,0 +1,68 @@ +import sleep from 'await-sleep' +import got from 'got' + +const ONE_SECOND = 1000 +const ONE_MINUTE = 60 * ONE_SECOND + +async function purgeFastlyBySurrogateKey({ apiToken, serviceId, surrogateKey }) { + const key = surrogateKey + const safeServiceId = encodeURIComponent(serviceId) + + const headers = { + 'fastly-key': apiToken, + accept: 'application/json', + 'fastly-soft-purge': '1', + } + const requestPath = `https://api.fastly.com/service/${safeServiceId}/purge/${key}` + return got.post(requestPath, { headers, json: true }) +} + +// This delay (includeDelayForPreboot) can potentially be removed in the +// future if the deployment workflow is updated to include a delay to offset +// Heroku Preboot before this script runs. +export default async function purgeEdgeCache({ includeDelayForPreboot = true } = {}) { + // If Heroku Preboot is enabled, then there is an additional delay of at + // least 2 minutes before the new dynos are swapped into active serving. + const delayForPrebootSwap = 2 * ONE_MINUTE + 30 * ONE_SECOND + + // Give the app some extra time to wake up before the thundering herd of + // Fastly requests. + const delayBeforeFirstPurge = ONE_MINUTE + + // Evidence has shown that it's necessary to purge twice to ensure all + // customers see fresh content. + const delayBeforeSecondPurge = 5 * ONE_SECOND + + console.log('Fastly purgeEdgeCache initialized...') + + const { FASTLY_TOKEN, FASTLY_SERVICE_ID, FASTLY_SURROGATE_KEY } = process.env + if (!FASTLY_TOKEN || !FASTLY_SERVICE_ID || !FASTLY_SURROGATE_KEY) { + console.log('Fastly env vars not detected; skipping purgeEdgeCache step') + return + } + + const purgingParams = { + apiToken: FASTLY_TOKEN, + serviceId: FASTLY_SERVICE_ID, + surrogateKey: FASTLY_SURROGATE_KEY, + } + + if (includeDelayForPreboot) { + console.log('Waiting for Heroku Preboot to swap dynos...') + await sleep(delayForPrebootSwap) + } + + console.log('Waiting extra time to prevent a Thundering Herd problem...') + await sleep(delayBeforeFirstPurge) + + console.log('Attempting first Fastly purge...') + const firstPurge = await purgeFastlyBySurrogateKey(purgingParams) + console.log('First Fastly purge result:', firstPurge.body || firstPurge) + + console.log('Waiting to purge a second time...') + await sleep(delayBeforeSecondPurge) + + console.log('Attempting second Fastly purge...') + const secondPurge = await purgeFastlyBySurrogateKey(purgingParams) + console.log('Second Fastly purge result:', secondPurge.body || secondPurge) +}