Are you looking for something? Here is all of the GitHub Docs history in one single commit. Enjoy! 🎉
146 lines
4.7 KiB
JavaScript
146 lines
4.7 KiB
JavaScript
const assert = require('assert')
|
|
const { chain, chunk, difference, isArray, isString, inRange } = require('lodash')
|
|
const eventToPromise = require('event-to-promise')
|
|
const objectHash = require('object-hash')
|
|
const countArrayValues = require('count-array-values')
|
|
const isURL = require('is-url')
|
|
const rank = require('./rank')
|
|
|
|
class AlgoliaIndex {
|
|
// records must be truncated to avoid going over Algolia's 10K limit
|
|
static get maxRecordLength () { return 8000 }
|
|
static get maxContentLength () { return 5000 }
|
|
static get namePrefix () { return 'github-docs' }
|
|
|
|
constructor (name, records) {
|
|
this.name = name
|
|
this.records = records
|
|
.map(record => {
|
|
record.customRanking = rank(record)
|
|
return record
|
|
})
|
|
this.validate()
|
|
return this
|
|
}
|
|
|
|
validate () {
|
|
assert(isString(this.name) && this.name.length, '`name` is required')
|
|
assert(isArray(this.records) && this.records.length, '`records` must be a non-empty array')
|
|
|
|
// each ID is unique
|
|
const objectIDs = this.records.map(record => record.objectID)
|
|
const dupes = countArrayValues(objectIDs)
|
|
.filter(({ value, count }) => count > 1)
|
|
.map(({ value }) => value)
|
|
assert(!dupes.length, `every objectID must be unique. dupes: ${dupes.join('; ')}`)
|
|
|
|
this.records.forEach(record => {
|
|
assert(
|
|
isString(record.objectID) && record.objectID.length,
|
|
`objectID must be a string. received: ${record.objectID}, ${JSON.stringify(record)}`
|
|
)
|
|
|
|
assert(
|
|
isString(record.title) && record.title.length,
|
|
`title must be a string. received: ${record.title}, ${JSON.stringify(record)}`
|
|
)
|
|
|
|
assert(
|
|
isURL(record.url),
|
|
`url must be a fully qualified URL. received: ${record.url}, ${JSON.stringify(record)}`
|
|
)
|
|
|
|
assert(
|
|
inRange(record.customRanking, 0, 4),
|
|
`customRanking must be an in-range number. received: ${record.customRanking}, (record: ${record.url})`
|
|
)
|
|
|
|
const recordLength = JSON.stringify(record).length
|
|
assert(
|
|
recordLength <= AlgoliaIndex.maxRecordLength,
|
|
`record ${record.url} is too long! ${recordLength} (max: ${AlgoliaIndex.maxRecordLength})`
|
|
)
|
|
})
|
|
|
|
return true
|
|
}
|
|
|
|
// This method consumes Algolia's `browseAll` event emitter,
|
|
// aggregating results into an array of all the records
|
|
// https://www.algolia.com/doc/api-reference/api-methods/browse/
|
|
async fetchExistingRecords () {
|
|
const client = require('./client')
|
|
|
|
// return an empty array if the index does not exist yet
|
|
const { items: indices } = await client.listIndexes()
|
|
|
|
if (!indices.find(index => index.name === this.name)) {
|
|
console.log(`index '${this.name}' does not exist!`)
|
|
return []
|
|
}
|
|
|
|
const index = client.initIndex(this.name)
|
|
const browser = index.browseAll()
|
|
let records = []
|
|
|
|
browser.on('result', (content) => {
|
|
records = records.concat(content.hits)
|
|
})
|
|
|
|
browser.on('error', (err) => {
|
|
throw err
|
|
})
|
|
|
|
await eventToPromise(browser, 'end')
|
|
|
|
return records
|
|
}
|
|
|
|
async syncWithRemote () {
|
|
const client = require('./client')
|
|
|
|
console.log('\n\nsyncing %s with remote', this.name)
|
|
this.validate()
|
|
|
|
const existingRecords = await this.fetchExistingRecords()
|
|
|
|
const existingIds = chain(existingRecords).map('objectID').value()
|
|
const currentIds = chain(this.records).map('objectID').value()
|
|
const deprecatedIds = difference(existingIds, currentIds)
|
|
const newIds = difference(currentIds, existingIds)
|
|
|
|
// Create a hash of every existing record, to compare to the new records
|
|
// The `object-hash` module is indifferent to object key order by default. :+1:
|
|
const existingHashes = existingRecords.map(record => objectHash(record))
|
|
|
|
// If a hash is found, that means the existing Algolia record contains the
|
|
// same data as new record, and the record doesn't need to be updated.
|
|
const recordsToUpdate = this.records.filter(record => {
|
|
return !existingHashes.includes(objectHash(record))
|
|
})
|
|
|
|
console.log('deprecated objectIDs:', deprecatedIds)
|
|
console.log('new objectIDs:', newIds)
|
|
console.log('total current records:', this.records.length)
|
|
console.log('records to update:', recordsToUpdate.length)
|
|
|
|
const index = client.initIndex(this.name)
|
|
|
|
if (deprecatedIds.length) {
|
|
console.log('deleting %d deprecated record(s)', deprecatedIds.length)
|
|
await index.deleteObjects(deprecatedIds)
|
|
}
|
|
|
|
if (recordsToUpdate.length) {
|
|
console.log('uploading %d new or modified record(s)', recordsToUpdate.length)
|
|
const chunks = chunk(recordsToUpdate, 1000)
|
|
|
|
for (const batch of chunks) {
|
|
await index.addObjects(batch)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
module.exports = AlgoliaIndex
|