mirror of
https://github.com/freeCodeCamp/freeCodeCamp.git
synced 2025-12-19 10:07:46 -05:00
141 lines
3.7 KiB
JavaScript
141 lines
3.7 KiB
JavaScript
const mdastToHTML = require('./mdast-to-html');
|
|
|
|
// Captures hanzi (pinyin) pairs (hanzi, optional whitespace, then pinyin parentheses)
|
|
const HANZI_PINYIN_PAIR = '([\u4e00-\u9fff]+)\\s*\\(([^)]+)\\)';
|
|
|
|
// Matches the BLANK placeholder
|
|
const BLANK_TOKEN = 'BLANK';
|
|
|
|
// Matches Chinese and English punctuation
|
|
const PUNCTUATION = '[,。?!!?,;:;:、]+';
|
|
|
|
// Matches Latin text with spaces
|
|
const OTHER_TEXT = '([a-zA-Z\\s]+)';
|
|
|
|
const HANZI_PINYIN_REGEX = new RegExp(
|
|
`${HANZI_PINYIN_PAIR}|${BLANK_TOKEN}|${PUNCTUATION}|${OTHER_TEXT}`,
|
|
'g'
|
|
);
|
|
|
|
/**
|
|
* Parses all hanzi-pinyin pairs from text
|
|
* @param {string} text - Text potentially containing multiple hanzi (pinyin) patterns
|
|
* @returns {Array<{hanzi: string, pinyin: string}>} Array of parsed pairs
|
|
*/
|
|
function parseHanziPinyinPairs(text) {
|
|
const pairs = [];
|
|
const regex = new RegExp(HANZI_PINYIN_REGEX);
|
|
let match;
|
|
|
|
while ((match = regex.exec(text)) !== null) {
|
|
if (match[1] && match[2]) {
|
|
pairs.push({
|
|
hanzi: match[1].trim(),
|
|
pinyin: match[2].trim()
|
|
});
|
|
}
|
|
}
|
|
|
|
return pairs;
|
|
}
|
|
|
|
/**
|
|
* Custom handler for Chinese inline code to render as ruby elements
|
|
* Matches hanzi-pinyin pairs, BLANK, and punctuation as separate elements
|
|
* @param {object} state - The state object from mdast-util-to-hast
|
|
* @param {object} node - The inlineCode node
|
|
* @returns {object|Array<object>} Hast element node or array of nodes
|
|
*/
|
|
function chineseInlineCodeHandler(state, node) {
|
|
const rubyPairs = parseHanziPinyinPairs(node.value);
|
|
|
|
if (rubyPairs.length > 0) {
|
|
const matches = [...node.value.matchAll(HANZI_PINYIN_REGEX)];
|
|
const nodes = matches.map(fullMatch => {
|
|
if (fullMatch[1] && fullMatch[2]) {
|
|
return {
|
|
type: 'element',
|
|
tagName: 'ruby',
|
|
properties: {},
|
|
children: [
|
|
{ type: 'text', value: fullMatch[1].trim() },
|
|
{
|
|
type: 'element',
|
|
tagName: 'rp',
|
|
properties: {},
|
|
children: [{ type: 'text', value: '(' }]
|
|
},
|
|
{
|
|
type: 'element',
|
|
tagName: 'rt',
|
|
properties: {},
|
|
children: [{ type: 'text', value: fullMatch[2].trim() }]
|
|
},
|
|
{
|
|
type: 'element',
|
|
tagName: 'rp',
|
|
properties: {},
|
|
children: [{ type: 'text', value: ')' }]
|
|
}
|
|
]
|
|
};
|
|
}
|
|
|
|
// Other captures (BLANK, punctuation, other text including spaces) should preserve exactly
|
|
return { type: 'text', value: fullMatch[0] };
|
|
});
|
|
|
|
return nodes.length === 1 ? nodes[0] : nodes;
|
|
}
|
|
|
|
// If static text, return code
|
|
return {
|
|
type: 'element',
|
|
tagName: 'span',
|
|
properties: { className: 'highlighted-text' },
|
|
children: [{ type: 'text', value: node.value }]
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Custom handler for inline code to render as span elements
|
|
* @param {object} state - The state object from mdast-util-to-hast
|
|
* @param {object} node - The inlineCode node
|
|
* @returns {object} Hast element node
|
|
*/
|
|
function spanInlineCodeHandler(state, node) {
|
|
return {
|
|
type: 'element',
|
|
tagName: 'span',
|
|
properties: { className: 'highlighted-text' },
|
|
children: [{ type: 'text', value: node.value }]
|
|
};
|
|
}
|
|
|
|
const spanOrRubyOptions = {
|
|
handlers: {
|
|
inlineCode: chineseInlineCodeHandler
|
|
}
|
|
};
|
|
|
|
const spanOptions = {
|
|
handlers: {
|
|
inlineCode: spanInlineCodeHandler
|
|
}
|
|
};
|
|
|
|
const createMdastToHtml = lang => {
|
|
if (lang === 'zh-CN') {
|
|
return x => mdastToHTML(x, spanOrRubyOptions);
|
|
} else if (lang === 'en-US' || lang === 'es') {
|
|
return x => mdastToHTML(x, spanOptions);
|
|
} else {
|
|
return mdastToHTML;
|
|
}
|
|
};
|
|
|
|
module.exports = {
|
|
parseHanziPinyinPairs,
|
|
createMdastToHtml
|
|
};
|