/** * A lot of translations have minor corruptions that will lead to rendering * failing (and having to rely on English fallback). Many of these are * easy to manually correct for. * * This function is a temporary solution to correct for these corruptions. * It looks for easy "low hanging fruit" that we can correct for. * */ export function correctTranslatedContentStrings(content, englishContent, debug = false) { // A lot of translations have corruptions around the AUTOTITLE links. // We've requested that these are corrected back but as a temporary // solution we'll manually recover now. // See internal issue #2762 // In late 2023, search in the translations repos if these things are // still happening and if not, the following lines can be removed. content = content.replaceAll('[AUTOTITLE"을 참조하세요]', '[AUTOTITLE]') content = content.replaceAll('[AUTOTITLE"을]', '[AUTOTITLE]') content = content.replaceAll('["AUTOTITLE]', '"[AUTOTITLE]') content = content.replaceAll('[AUTOTITLE"을 참조하세요.](', '[AUTOTITLE](') // A lot of Liquid tags lose their linebreak after the `}` which can // result in formatting problems, especially around Markdown tables. // This code here, compares each Liquid statement, in the translation, // and tests if it appears like that but with a newline in the English. // English example: // // {%- ifversion ghes %} // | Thing | ✔️ | // {%- endif %} // // Translation example: // // {%- ifversion ghes %} | Thing | ✔️ | {%- endif %} // // There exists the risk that different Liquid statements gets compared // different Liquid statements in the English, but the risk is worth // taking because even if this accidentally introduces a newline, it's // unlikely to cause a problem. At worst that a sentence displays on its // own paragraph. content = content.replace(/\{%(.+?)%\} /g, (match) => { if (match.lastIndexOf('{%') > 0) { // For example: // // `{% bla bla %}, and {% foo bar %} ` // // Our regex is not greedy, but technically, if you look closely // you'll see this is the first match that starts with `{%` and // ends with `%} `. Let's skip these. return match } const withLinebreak = match.slice(0, -1) + '\n' if (englishContent.includes(withLinebreak) && !englishContent.includes(match)) { return withLinebreak } return match }) // The above corrections deepend on looking for `{% foo %} ` and replacing // it with `{% foo %}\n`. ...if `{% foo %}\n` was in the English // content and `{% foo %} ` was *not*. // However we see a lot of cases of this: // // ... {% endif %} | First Column ... // // Which needs to become this: // // ... {% endif %} // | First Column ... // // And since `{% endif %}` is such a common Liquid tag we can't reply // on lookig for it with `{% endif %}\n` in the English content. content = content.replace(/\{% endif %\} \| /g, (match) => { const potentiallyBetter = '{% endif %}\n| ' if (englishContent.includes(potentiallyBetter)) { return potentiallyBetter } return match }) // All too often we see translations that look like this: // // | Qualifizierer | Beschreibung | | -------- | -------- | {% ifversion ghec or ghes > 3.8 %} | `advanced-security:enabled` | Zeigt Repositorys an, für die {% data variables.product.prodname_GH_advanced_security %} aktiviert wurde | {% endif %} | `code-scanning-pull-request-alerts:enabled`| Zeigt Repositorys an, für die die {% data variables.product.prodname_code_scanning %} zur Ausführung bei Pull Requests konfiguriert wurde | | `dependabot-security-updates:enabled` | Zeigt Repositorys an, für die {% data variables.product.prodname_dependabot %}-Sicherheitsupdates aktiviert wurden | | `secret-scanning-push-protection:enabled` | Zeigt Repositorys an, für die der Pushschutz für die {% data variables.product.prodname_secret_scanning %} aktiviert wurde | {% endif %} // // Yes, that's one very long line. Notice how all the necessary linebreaks // are suddenly gone. content = content.replaceAll(' | | ', ' |\n| ') return content }