Files
freeCodeCamp/tools/challenge-parser/parser/plugins/utils/i18n-stringify.js

141 lines
3.7 KiB
JavaScript

const mdastToHTML = require('./mdast-to-html');
// Captures hanzi (pinyin) pairs (hanzi, optional whitespace, then pinyin parentheses)
const HANZI_PINYIN_PAIR = '([\u4e00-\u9fff]+)\\s*\\(([^)]+)\\)';
// Matches the BLANK placeholder
const BLANK_TOKEN = 'BLANK';
// Matches Chinese and English punctuation
const PUNCTUATION = '[,。?!!?,;:;:、]+';
// Matches Latin text with spaces
const OTHER_TEXT = '([a-zA-Z\\s]+)';
const HANZI_PINYIN_REGEX = new RegExp(
`${HANZI_PINYIN_PAIR}|${BLANK_TOKEN}|${PUNCTUATION}|${OTHER_TEXT}`,
'g'
);
/**
* Parses all hanzi-pinyin pairs from text
* @param {string} text - Text potentially containing multiple hanzi (pinyin) patterns
* @returns {Array<{hanzi: string, pinyin: string}>} Array of parsed pairs
*/
function parseHanziPinyinPairs(text) {
const pairs = [];
const regex = new RegExp(HANZI_PINYIN_REGEX);
let match;
while ((match = regex.exec(text)) !== null) {
if (match[1] && match[2]) {
pairs.push({
hanzi: match[1].trim(),
pinyin: match[2].trim()
});
}
}
return pairs;
}
/**
* Custom handler for Chinese inline code to render as ruby elements
* Matches hanzi-pinyin pairs, BLANK, and punctuation as separate elements
* @param {object} state - The state object from mdast-util-to-hast
* @param {object} node - The inlineCode node
* @returns {object|Array<object>} Hast element node or array of nodes
*/
function chineseInlineCodeHandler(state, node) {
const rubyPairs = parseHanziPinyinPairs(node.value);
if (rubyPairs.length > 0) {
const matches = [...node.value.matchAll(HANZI_PINYIN_REGEX)];
const nodes = matches.map(fullMatch => {
if (fullMatch[1] && fullMatch[2]) {
return {
type: 'element',
tagName: 'ruby',
properties: {},
children: [
{ type: 'text', value: fullMatch[1].trim() },
{
type: 'element',
tagName: 'rp',
properties: {},
children: [{ type: 'text', value: '(' }]
},
{
type: 'element',
tagName: 'rt',
properties: {},
children: [{ type: 'text', value: fullMatch[2].trim() }]
},
{
type: 'element',
tagName: 'rp',
properties: {},
children: [{ type: 'text', value: ')' }]
}
]
};
}
// Other captures (BLANK, punctuation, other text including spaces) should preserve exactly
return { type: 'text', value: fullMatch[0] };
});
return nodes.length === 1 ? nodes[0] : nodes;
}
// If static text, return code
return {
type: 'element',
tagName: 'span',
properties: { className: 'highlighted-text' },
children: [{ type: 'text', value: node.value }]
};
}
/**
* Custom handler for inline code to render as span elements
* @param {object} state - The state object from mdast-util-to-hast
* @param {object} node - The inlineCode node
* @returns {object} Hast element node
*/
function spanInlineCodeHandler(state, node) {
return {
type: 'element',
tagName: 'span',
properties: { className: 'highlighted-text' },
children: [{ type: 'text', value: node.value }]
};
}
const spanOrRubyOptions = {
handlers: {
inlineCode: chineseInlineCodeHandler
}
};
const spanOptions = {
handlers: {
inlineCode: spanInlineCodeHandler
}
};
const createMdastToHtml = lang => {
if (lang === 'zh-CN') {
return x => mdastToHTML(x, spanOrRubyOptions);
} else if (lang === 'en-US' || lang === 'es') {
return x => mdastToHTML(x, spanOptions);
} else {
return mdastToHTML;
}
};
module.exports = {
parseHanziPinyinPairs,
createMdastToHtml
};