#!/usr/bin/env node const fs = require('fs') const path = require('path') const { execSync } = require('child_process') const server = require('../server') const port = '4001' const host = `http://localhost:${port}` const scrape = require('website-scraper') const program = require('commander') const rimraf = require('rimraf').sync const mkdirp = require('mkdirp').sync const version = require('../lib/enterprise-server-releases').oldestSupported const archivalRepoName = 'help-docs-archived-enterprise-versions' const archivalRepoUrl = `https://github.com/github/${archivalRepoName}` const loadRedirects = require('../lib/redirects/precompile') // [start-readme] // // Run this script during the Enterprise deprecation process to download // static copies of all pages for the oldest supported Enterprise version. // See the Enterprise deprecation issue template for instructions. // // [end-readme] program .description('Scrape HTML of the oldest supported Enterprise version and add it to the archival repository.') .option('-p, --path-to-archival-repo ', `path to a local checkout of ${archivalRepoUrl}`) .option('-d, --dry-run', 'only scrape the first 10 pages for testing purposes') .parse(process.argv) const pathToArchivalRepo = program.pathToArchivalRepo const dryRun = program.dryRun main() class RewriteAssetPathsPlugin { constructor (version, tempDirectory) { this.version = version this.tempDirectory = tempDirectory } apply (registerAction) { registerAction('onResourceSaved', async ({ resource }) => { // Show some activity process.stdout.write('.') // Only operate on HTML files if (!resource.isHtml()) return // Get the text contents of the resource const text = resource.getText() // Rewrite asset paths. Example: // ../../javascripts/index.js -> /enterprise/2.17/javascripts/index.js const newBody = text.replace( /(?src|href)="(?:\.\.\/)*(?dist|javascripts|stylesheets|assets|node_modules)/g, (match, attribute, basepath) => { const replaced = path.join('/enterprise', this.version, basepath) const returnValue = `${attribute}="${replaced}` return returnValue } ) const filePath = path.join(this.tempDirectory, resource.getFilename()) await fs .promises .writeFile(filePath, newBody) }) } } async function main () { if (!pathToArchivalRepo) { console.log(`Please specify a path to a local checkout of ${archivalRepoUrl}`) console.log(`Example: script/archive-enterprise-version.js ../${archivalRepoName}`) process.exit() } if (dryRun) { console.log('This is a dry run! Creating HTML for redirects and scraping the first 10 pages only.\n') } // Build the production assets, to simulate a production deployment console.log('Running `npm run build` for production assets') execSync('npm run build', { stdio: 'inherit' }) console.log('Finish building production assets') const fullPathToArchivalRepo = path.join(process.cwd(), pathToArchivalRepo) if (!fs.existsSync(fullPathToArchivalRepo)) { console.log(`archival repo path does not exist: ${fullPathToArchivalRepo}`) process.exit() } console.log(`Enterprise version to archive: ${version}`) const pages = await (require('../lib/pages')()) const permalinksPerVersion = Object.keys(pages) .filter(key => key.includes(`/enterprise/${version}`)) const urls = dryRun ? permalinksPerVersion.slice(0, 10).map(href => `${host}${href}`) : permalinksPerVersion.map(href => `${host}${href}`) console.log(`found ${urls.length} pages for version ${version}`) if (dryRun) { console.log(`\nscraping html for these pages only:\n${urls.join('\n')}\n`) } const finalDirectory = path.join(fullPathToArchivalRepo, version) const tempDirectory = path.join(__dirname, '../website-scraper-temp') // remove temp directory rimraf(tempDirectory) // remove and recreate empty target directory rimraf(finalDirectory) fs.mkdirSync(finalDirectory, { recursive: true }) const scraperOptions = { urls, urlFilter: (url) => { // Do not download assets from other hosts like S3 or octodex.github.com // (this will keep them as remote references in the downloaded pages) return url.startsWith(`http://localhost:${port}/`) }, directory: tempDirectory, filenameGenerator: 'bySiteStructure', requestConcurrency: 6, plugins: [new RewriteAssetPathsPlugin(version, tempDirectory)] } server.listen(port, async () => { console.log(`started server on ${host}`) await scrape(scraperOptions).catch(err => { console.error('scraping error') console.error(err) }) fs.renameSync( path.join(tempDirectory, `/localhost_${port}`), path.join(finalDirectory) ) rimraf(tempDirectory) console.log(`\n\ndone scraping! added files to ${path.relative(process.cwd(), finalDirectory)}\n`) // create redirect html files to preserve frontmatter redirects await createRedirectPages(permalinksPerVersion, pages, finalDirectory) console.log(`next step: deprecate ${version} in lib/enterprise-server-releases.js`) process.exit() }) } async function createRedirectPages (permalinks, pages, finalDirectory) { const pagesPerVersion = permalinks.map(permalink => pages[permalink]) const redirects = await loadRedirects(pagesPerVersion) Object.entries(redirects).forEach(([oldPath, newPath]) => { // replace any liquid variables with the version number oldPath = oldPath.replace('{{ page.version }}', version) // ignore any old paths that are not in this version if (!oldPath.includes(`/enterprise/${version}`)) return const fullPath = path.join(finalDirectory, oldPath) const filename = `${fullPath}/index.html` const html = getRedirectHtml(newPath) mkdirp(fullPath) fs.writeFileSync(filename, html) }) console.log('done creating redirect files!\n') } // prior art: https://github.com/github/help-docs-archived-enterprise-versions/blob/master/2.12/user/leave-a-repo/index.html // redirect html files already exist in <=2.12 because these versions were deprecated on the old static site function getRedirectHtml (newPath) { return ` Redirecting...

Redirecting...

Click here if you are not redirected. ` }