diff --git a/content/README.md b/content/README.md index 79aa0fca8b..45b571c323 100644 --- a/content/README.md +++ b/content/README.md @@ -228,9 +228,9 @@ includeGuides: - Optional. ### `topics` -- Purpose: Indicate the topics covered by the article. -- Type: `String` -- Optional. +- Purpose: Indicate the topics covered by the article. The topics are used to filter guides on some landing pages. For example, the guides at the bottom of [this page](https://docs.github.com/en/actions/guides) can be filtered by topics and the topics are listed under the guide intro. Topics are also added to all search records that get created for each page. The search records contain a `topics` property that is used to filter search results by topics. For more information, see the [Search](/contributing/search.md) contributing guide. Refer to the content models for more details around adding topics. A full list of existing topics is located in the [allowed topics file](/data/allowed-topics.js). If topics in article frontmatter and the allow-topics list become out of sync, the [topics CI test](/tests/unit/search/topics.js) will fail. +- Type: Array of `String`s +- Optional: Topics are preferred for each article, but, there may be cases where existing articles don't yet have topics or a adding a topic to a new article may not add value. ### `contributor` - Purpose: Indicate an article is contributed and maintained by a third-party organization, typically a GitHub Technology Partner. diff --git a/contributing/search.md b/contributing/search.md index 093a419341..2c2eabf09a 100644 --- a/contributing/search.md +++ b/contributing/search.md @@ -15,7 +15,10 @@ To see all existing search-related issues and pull requests, visit [github.com/g ## How to search The site search is part of every version of docs.github.com. On any page, you can use the search box to search the documents we've indexed. -You can also query our search endpoint directly at: https://docs.github.com/search?language=en&version=dotcom&query=jekyll +You can also query our search endpoint directly at: https://docs.github.com/search?language=en&version=dotcom&query=jekyll+topics:actions + +Using the attribute `topics` in your query will only return results that have the matching topic value. You can find a full list of topics in [the allowed topics file](/data/allowed-topics.js). The `topics` attribute is configured as a [`filter only` facet in Algolia](https://www.algolia.com/doc/guides/managing-results/refine-results/filtering/). + This endpoint responds in JSON format, and fronts Algolia and Lunr. We recommend using this endpoint over directly integrating with Algolia or Lunr, as the endpoint will be more stable. ## Production deploys diff --git a/data/allowed-topics.js b/data/allowed-topics.js new file mode 100644 index 0000000000..a309eccd03 --- /dev/null +++ b/data/allowed-topics.js @@ -0,0 +1,84 @@ +// This is an AllowList of topics that are approved for use in `topics` +// frontmatter property. If a new topic is added to a Markdown file it must +// also be added to this file. + +// The purpose of this list is to ensure we prevent typos and put a process in +// place to keep a curated list of topics. This list also serves as a list of +// available topics filters when using the search endpoint +// (see /contributing/search#how-to-search) +// If you'd like to add a new topic, consult the topic guidelines in the +// content model, add the entry to this list, and ensure you loop in the +// content and/or content strategy team for review. + +module.exports = [ + '2fa', + 'Action development', + 'Amazon ECS', + 'Ant', + 'Azure App Service', + 'Azure Pipelines', + 'CD', + 'CI', + 'CircleCI', + 'Containers', + 'Docker', + 'Fundamentals', + 'GitLab', + 'Google Kubernetes Engine', + 'Gradle', + 'Java', + 'JavaScript', + 'Jenkins', + 'Maven', + 'Migration', + 'Node', + 'Packaging', + 'Powershell', + 'Project management', + 'Publishing', + 'Python', + 'Ruby', + 'Security', + 'Travis CI', + 'Workflows', + 'access management', + 'accounts', + 'api', + 'billing', + 'cli', + 'codespaces', + 'community', + 'desktop', + 'device verification', + 'early access', + 'enterprise', + 'events', + 'github', + 'github apps', + 'github search', + 'identity', + 'issues', + 'jobs', + 'legal', + 'marketplace', + 'mobile', + 'notifications', + 'oauth apps', + 'open source', + 'organizations', + 'pages', + 'permissions', + 'policy', + 'profile', + 'profiles', + 'projects', + 'pull requests', + 'repositories', + 'security', + 'sponsors', + 'ssh', + 'sso', + 'teams', + 'usernames', + 'webhooks' +] diff --git a/includes/head.html b/includes/head.html index 5d931a9048..0cd6b63fad 100644 --- a/includes/head.html +++ b/includes/head.html @@ -11,6 +11,9 @@ {% if page.intro %} {% endif %} + {% if page.topics %} + + {% endif %} {% if page.hidden %} {% endif %} diff --git a/javascripts/search.js b/javascripts/search.js index 760bce874b..edc7def535 100644 --- a/javascripts/search.js +++ b/javascripts/search.js @@ -261,8 +261,8 @@ function tmplSearchResult ({ url, breadcrumbs, heading, title, content }) { { href: url, class: 'no-underline' }, div( { class: 'search-result-breadcrumbs d-block text-gray-dark opacity-60 text-small pb-1' }, - // Remove redundant title from the end of breadcrumbs - markify((breadcrumbs || '').replace(` / ${title}`, '')) + // Breadcrumbs in search records don't include the page title + markify(breadcrumbs || '') ), div( { class: 'search-result-title d-block h4-mktg text-gray-dark' }, diff --git a/lib/search/algolia-search.js b/lib/search/algolia-search.js index e14b4e4524..a5d0a598ca 100644 --- a/lib/search/algolia-search.js +++ b/lib/search/algolia-search.js @@ -24,6 +24,7 @@ module.exports = async function loadAlgoliaResults ({ version, language, query, breadcrumbs: get(hit, '_highlightResult.breadcrumbs.value'), heading: get(hit, '_highlightResult.heading.value'), title: get(hit, '_highlightResult.title.value'), - content: get(hit, '_highlightResult.content.value') + content: get(hit, '_highlightResult.content.value'), + topics: hit.topics })) } diff --git a/lib/search/lunr-search-index.js b/lib/search/lunr-search-index.js index 05eb88ddc3..bde23b7f7e 100644 --- a/lib/search/lunr-search-index.js +++ b/lib/search/lunr-search-index.js @@ -46,6 +46,7 @@ module.exports = class LunrIndex { this.field('heading') this.field('title') this.field('content') + this.field('topics') this.field('customRanking') this.metadataWhitelist = ['position'] diff --git a/lib/search/lunr-search.js b/lib/search/lunr-search.js index 1a3e65f319..1a0d5db2b5 100644 --- a/lib/search/lunr-search.js +++ b/lib/search/lunr-search.js @@ -25,7 +25,9 @@ module.exports = async function loadLunrResults ({ version, language, query, lim breadcrumbs: field(result, record, 'breadcrumbs'), heading: field(result, record, 'heading'), title: field(result, record, 'title'), - content: field(result, record, 'content') + content: field(result, record, 'content'), + // don't highlight the topics array + topics: record.topics } }) return results @@ -48,6 +50,7 @@ async function loadLunrRecords (indexName) { .then(JSON.parse) } +// Highlight a match within an attribute field function field (result, record, name) { const text = record[name] if (!text) return text diff --git a/lib/search/parse-page-sections-into-records.js b/lib/search/parse-page-sections-into-records.js index 79ac651f83..3c253ae617 100644 --- a/lib/search/parse-page-sections-into-records.js +++ b/lib/search/parse-page-sections-into-records.js @@ -12,7 +12,7 @@ const { maxContentLength } = require('./config') module.exports = function parsePageSectionsIntoRecords (href, $) { const title = $('h1').text().trim() - const breadcrumbs = $('nav.breadcrumbs a') + const breadcrumbsArray = $('nav.breadcrumbs a') .map((i, el) => { return $(el) .text() @@ -21,7 +21,18 @@ module.exports = function parsePageSectionsIntoRecords (href, $) { .replace(/\s+/g, ' ') }) .get() - .join(' / ') + .slice(0, -1) + + const breadcrumbs = breadcrumbsArray.join(' / ') || '' + const metaKeywords = $('meta[name="keywords"]').attr('content') + const topics = metaKeywords ? metaKeywords.split(',') : [] + + const productName = breadcrumbsArray[0] || '' + topics.push(productName) + // Remove "github" to make filter queries shorter + if (productName.includes('GitHub ')) { + topics.push(productName.replace('GitHub ', '')) + } let records @@ -54,7 +65,8 @@ module.exports = function parsePageSectionsIntoRecords (href, $) { breadcrumbs, heading, title, - content + content, + topics } }) .get() @@ -74,7 +86,8 @@ module.exports = function parsePageSectionsIntoRecords (href, $) { url, breadcrumbs, title, - content + content, + topics }] } diff --git a/script/content-migrations/add-tags-to-articles.js b/script/content-migrations/add-tags-to-articles.js index 9a6f43138c..e98e7fe172 100755 --- a/script/content-migrations/add-tags-to-articles.js +++ b/script/content-migrations/add-tags-to-articles.js @@ -48,6 +48,7 @@ function updateFrontmatter (filePath, newTopics) { } else if (Array.isArray(data.topics)) { topics = topics.concat(data.topics) } + newTopics.forEach(topic => { topics.push(topic) }) diff --git a/tests/unit/algolia/fixtures/page-with-sections.html b/tests/unit/search/fixtures/page-with-sections.html similarity index 75% rename from tests/unit/algolia/fixtures/page-with-sections.html rename to tests/unit/search/fixtures/page-with-sections.html index f61318d4c4..f6da35c328 100644 --- a/tests/unit/algolia/fixtures/page-with-sections.html +++ b/tests/unit/search/fixtures/page-with-sections.html @@ -1,8 +1,11 @@ + + +

I am the page title

diff --git a/tests/unit/algolia/fixtures/page-without-sections.html b/tests/unit/search/fixtures/page-without-sections.html similarity index 56% rename from tests/unit/algolia/fixtures/page-without-sections.html rename to tests/unit/search/fixtures/page-without-sections.html index 2c137527e6..4046e0a66c 100644 --- a/tests/unit/algolia/fixtures/page-without-sections.html +++ b/tests/unit/search/fixtures/page-without-sections.html @@ -1,10 +1,13 @@ + + +

I am outside the article and should not be included

A page without sections

diff --git a/tests/unit/algolia/parse-page-sections-into-records.js b/tests/unit/search/parse-page-sections-into-records.js similarity index 79% rename from tests/unit/algolia/parse-page-sections-into-records.js rename to tests/unit/search/parse-page-sections-into-records.js index e328f846f0..982a72a086 100644 --- a/tests/unit/algolia/parse-page-sections-into-records.js +++ b/tests/unit/search/parse-page-sections-into-records.js @@ -20,19 +20,21 @@ describe('search parsePageSectionsIntoRecords module', () => { objectID: '/example/href#first', url: 'https://docs.github.com/example/href#first', slug: 'first', - breadcrumbs: 'a / b / c', + breadcrumbs: 'GitHub Actions / actions learning path', heading: 'First heading', title: 'I am the page title', - content: "Here's a paragraph. And another." + content: "Here's a paragraph. And another.", + topics: ['topic1', 'topic2', 'GitHub Actions', 'Actions'] }, { objectID: '/example/href#second', url: 'https://docs.github.com/example/href#second', slug: 'second', - breadcrumbs: 'a / b / c', + breadcrumbs: 'GitHub Actions / actions learning path', heading: 'Second heading', title: 'I am the page title', - content: "Here's a paragraph in the second section. And another." + content: "Here's a paragraph in the second section. And another.", + topics: ['topic1', 'topic2', 'GitHub Actions', 'Actions'] } ] @@ -50,9 +52,10 @@ describe('search parsePageSectionsIntoRecords module', () => { { objectID: '/example/href', url: 'https://docs.github.com/example/href', - breadcrumbs: 'x / y / z', + breadcrumbs: 'Education / map topic', title: 'A page without sections', - content: 'First paragraph. Second paragraph.' + content: 'First paragraph. Second paragraph.', + topics: ['key1', 'key2', 'key3', 'Education'] } ] expect(records).toEqual(expected) diff --git a/tests/unit/algolia/rank.js b/tests/unit/search/rank.js similarity index 100% rename from tests/unit/algolia/rank.js rename to tests/unit/search/rank.js diff --git a/tests/unit/search/topics.js b/tests/unit/search/topics.js new file mode 100644 index 0000000000..67e31554bb --- /dev/null +++ b/tests/unit/search/topics.js @@ -0,0 +1,32 @@ +const path = require('path') +const fs = require('fs') +const readFrontmatter = require('../../../lib/read-frontmatter') +const walk = require('walk-sync') +const { difference } = require('lodash') +const allowedTopics = require('../../../data/allowed-topics') + +const contentDir = path.join(process.cwd(), 'content') +const topics = walk(contentDir, { includeBasePath: true }) + .filter(filename => filename.endsWith('.md') && !filename.includes('README')) + .map(filename => { + const fileContent = fs.readFileSync(filename, 'utf8') + const { data } = readFrontmatter(fileContent) + return data.topics || [] + }) + .flat() + +const allUsedTopics = [...new Set(topics)].sort() + +describe('Check for allowed frontmatter topics', () => { + test('all used topics are allowed in /data/allowed-topics.js', () => { + expect(allUsedTopics.length).toBeGreaterThan(0) + const unusedTopics = difference(allUsedTopics, allowedTopics) + expect(unusedTopics).toEqual([]) + }) + + test('all allowed topics are used by at least one content file', () => { + expect(allowedTopics.length).toBeGreaterThan(0) + const disallowedTopics = difference(allowedTopics, allUsedTopics) + expect(disallowedTopics).toEqual([]) + }) +})