diff --git a/data/reusables/projects/select-a-cell.md b/data/reusables/projects/select-a-cell.md index 5591d9697d..e59144425e 100644 --- a/data/reusables/projects/select-a-cell.md +++ b/data/reusables/projects/select-a-cell.md @@ -1,4 +1,4 @@ - Hold Command (Mac) or Ctrl (Windows/Linux) and select each cell. -- Select an cell then press Shift+ or Shift+ to select additional cell above or below the selected item. -- Select an cell then press Shift and select another item to select all items between the two items. +- Select a cell then press Shift+ or Shift+ to select additional cell above or below the selected item. +- Select a cell then press Shift and select another item to select all items between the two items. - Holding your mouse button down, move the cursor over the cells you want to select. \ No newline at end of file diff --git a/lib/page-data.js b/lib/page-data.js index 8fbdc08087..f41ee78372 100644 --- a/lib/page-data.js +++ b/lib/page-data.js @@ -149,7 +149,7 @@ async function translateTree(dir, langObj, enTree) { ) // The "content" isn't a frontmatter key - translatedData.markdown = correctTranslatedContentStrings(content) + translatedData.markdown = correctTranslatedContentStrings(content, enPage.markdown) item.page = new Page( Object.assign( @@ -190,7 +190,7 @@ async function translateTree(dir, langObj, enTree) { * It looks for easy "low hanging fruit" that we can correct for. * */ -export function correctTranslatedContentStrings(content) { +export function correctTranslatedContentStrings(content, englishContent, debug = false) { // A lot of translations have corruptions around the AUTOTITLE links. // We've requested that these are corrected back but as a temporary // solution we'll manually recover now. @@ -202,18 +202,72 @@ export function correctTranslatedContentStrings(content) { content = content.replaceAll('["AUTOTITLE]', '"[AUTOTITLE]') content = content.replaceAll('[AUTOTITLE"을 참조하세요.](', '[AUTOTITLE](') - // The page content/code-security/secret-scanning/secret-scanning-patterns.md - // uses some intricate tables in Markdown where exact linebreaks can - // cause the page to render incorrectly. Instead of becoming a ``, - // it becomes a massive `

` tag. - // Ideally, we should have a better solution that doesn't require such - // "sensitive" Markdown but for now, this change is important so the - // Markdown-to-HTML rendering doesn't become totally broken. - // See internal issue #2984 - content = content.replaceAll( - '{%- for entry in secretScanningData %} |', - '{%- for entry in secretScanningData %}\n|' - ) + // A lot of Liquid tags lose their linebreak after the `}` which can + // result in formatting problems, especially around Markdown tables. + // This code here, compares each Liquid statement, in the translation, + // and tests if it appears like that but with a newline in the English. + // English example: + // + // {%- ifversion ghes %} + // | Thing | ✔️ | + // {%- endif %} + // + // Translation example: + // + // {%- ifversion ghes %} | Thing | ✔️ | {%- endif %} + // + // There exists the risk that different Liquid statements gets compared + // different Liquid statements in the English, but the risk is worth + // taking because even if this accidentally introduces a newline, it's + // unlikely to cause a problem. At worst that a sentence displays on its + // own paragraph. + content = content.replace(/\{%(.+?)%\} /g, (match) => { + if (match.lastIndexOf('{%') > 0) { + // For example: + // + // `{% bla bla %}, and {% foo bar %} ` + // + // Our regex is not greedy, but technically, if you look closely + // you'll see this is the first match that starts with `{%` and + // ends with `%} `. Let's skip these. + return match + } + + const withLinebreak = match.slice(0, -1) + '\n' + if (englishContent.includes(withLinebreak) && !englishContent.includes(match)) { + return withLinebreak + } + return match + }) + // The above corrections deepend on looking for `{% foo %} ` and replacing + // it with `{% foo %}\n`. ...if `{% foo %}\n` was in the English + // content and `{% foo %} ` was *not*. + // However we see a lot of cases of this: + // + // ... {% endif %} | First Column ... + // + // Which needs to become this: + // + // ... {% endif %} + // | First Column ... + // + // And since `{% endif %}` is such a common Liquid tag we can't reply + // on lookig for it with `{% endif %}\n` in the English content. + content = content.replace(/\{% endif %\} \| /g, (match) => { + const potentiallyBetter = '{% endif %}\n| ' + if (englishContent.includes(potentiallyBetter)) { + return potentiallyBetter + } + return match + }) + + // All too often we see translations that look like this: + // + // | Qualifizierer | Beschreibung | | -------- | -------- | {% ifversion ghec or ghes > 3.8 %} | `advanced-security:enabled` | Zeigt Repositorys an, für die {% data variables.product.prodname_GH_advanced_security %} aktiviert wurde | {% endif %} | `code-scanning-pull-request-alerts:enabled`| Zeigt Repositorys an, für die die {% data variables.product.prodname_code_scanning %} zur Ausführung bei Pull Requests konfiguriert wurde | | `dependabot-security-updates:enabled` | Zeigt Repositorys an, für die {% data variables.product.prodname_dependabot %}-Sicherheitsupdates aktiviert wurden | | `secret-scanning-push-protection:enabled` | Zeigt Repositorys an, für die der Pushschutz für die {% data variables.product.prodname_secret_scanning %} aktiviert wurde | {% endif %} + // + // Yes, that's one very long line. Notice how all the necessary linebreaks + // are suddenly gone. + content = content.replaceAll(' | | ', ' |\n| ') return content } diff --git a/middleware/contextualizers/glossaries.js b/middleware/contextualizers/glossaries.js index b643362239..b6ba9434aa 100644 --- a/middleware/contextualizers/glossaries.js +++ b/middleware/contextualizers/glossaries.js @@ -30,7 +30,17 @@ export default async function glossaries(req, res, next) { glossariesRaw.map(async (glossary) => { let { description } = glossary if (req.context.currentLanguage !== 'en') { - description = correctTranslatedContentStrings(description) + description = correctTranslatedContentStrings( + description, + // The function needs the English equialent of the translated + // Markdown. It's to make possible corrections to the + // translation's Liquid which might have lost important + // linebreaks. + // But because the terms themselves are often translated, + // in this mapping we often don't have an English equivalent. + // So that's why we fall back on the empty string. + enGlossaryMap.get(glossary.term) || '' + ) } description = await executeWithFallback( req.context, diff --git a/tests/fixtures/content/get-started/foo/index.md b/tests/fixtures/content/get-started/foo/index.md index d62fbc882a..4262bd4e7a 100644 --- a/tests/fixtures/content/get-started/foo/index.md +++ b/tests/fixtures/content/get-started/foo/index.md @@ -16,4 +16,5 @@ children: - /for-playwright - /html-short-title - /page-with-callout + - /table-with-ifversions --- diff --git a/tests/fixtures/content/get-started/foo/table-with-ifversions.md b/tests/fixtures/content/get-started/foo/table-with-ifversions.md new file mode 100644 index 0000000000..87fcf42716 --- /dev/null +++ b/tests/fixtures/content/get-started/foo/table-with-ifversions.md @@ -0,0 +1,23 @@ +--- +title: Table with ifversions +intro: Demonstrates a page that uses Liquid if statements to hide certain rows in a Markdown table +versions: + fpt: '*' + ghes: '*' + ghec: '*' +--- + +## Intro + +Here's an inline mention of {% data variables.product.product_name %} in Liquid. + +## Table + +| Key | Value | +|---|---| +{% ifversion ghes -%} +| GHES | Present | +{%- endif -%} +{%- ifversion not ghes -%} +| GHES | Not | +{% endif %} diff --git a/tests/fixtures/translations/ja-jp/content/get-started/foo/table-with-ifversions.md b/tests/fixtures/translations/ja-jp/content/get-started/foo/table-with-ifversions.md new file mode 100644 index 0000000000..2704484751 --- /dev/null +++ b/tests/fixtures/translations/ja-jp/content/get-started/foo/table-with-ifversions.md @@ -0,0 +1,28 @@ +--- +title: Table with ifversions +intro: Liquid if ステートメントを使用して Markdown テーブル内の特定の行を非表示にするページを示します。 +versions: + fpt: '*' + ghes: '*' + ghec: '*' +--- + +## Intro + +Here's an inline mention of {% data variables.product.product_name %} in Liquid. + +## Table + +What's important here is that in our automated translation system, +sometimes the trailing linebreak after the `%}` or `-%}` gets lost. +So this Markdown below is hand-crafted to look exactly like what happens +sometimes in our translations. + +Having this fixture, albeit a bit corrupt and wrong, allows us to be +certain that our string manipulation code can do magic and fix this +so the Liquid statements have matching linebreaks compared to the English +version. + +| Key | Value | +|---|---| +{% ifversion ghes -%} | GHES | Present | {%- endif -%} {%- ifversion not ghes -%} | GHES | Not | {% endif %} diff --git a/tests/rendering-fixtures/translations.js b/tests/rendering-fixtures/translations.js index 81cad485b5..f12f4ce3fd 100644 --- a/tests/rendering-fixtures/translations.js +++ b/tests/rendering-fixtures/translations.js @@ -59,4 +59,33 @@ describe('translations', () => { // There are 4 links on the `autotitling.md` content. expect.assertions(4) }) + + test('correction of linebreaks in translations', async () => { + // free-pro-team + { + const $ = await getDOM('/ja/get-started/foo/table-with-ifversions') + + const paragraph = $('#article-contents p').text() + expect(paragraph).toMatch('mention of GitHub in Liquid') + + const tds = $('#article-contents td') + .map((i, element) => $(element).text()) + .get() + expect(tds.length).toBe(2) + expect(tds[1]).toBe('Not') + } + // enterprise-server + { + const $ = await getDOM('/ja/enterprise-server@latest/get-started/foo/table-with-ifversions') + + const paragraph = $('#article-contents p').text() + expect(paragraph).toMatch('mention of GitHub Enterprise Server in Liquid') + + const tds = $('#article-contents td') + .map((i, element) => $(element).text()) + .get() + expect(tds.length).toBe(2) + expect(tds[1]).toBe('Present') + } + }) })