feat(client,challenge-parser): render Chinese as ruby markup (#63424)

Co-authored-by: Oliver Eyton-Williams <ojeytonwilliams@gmail.com>
This commit is contained in:
Huyen Nguyen
2025-11-10 11:25:57 -08:00
committed by GitHub
parent 14dd3c6b27
commit 0c844ab007
13 changed files with 601 additions and 128 deletions

View File

@@ -0,0 +1,78 @@
const mdastToHTML = require('./mdast-to-html');
/**
* Parses Chinese text in format: hanzi (pinyin)
* @param {string} text - Text in format: hanzi (pinyin)
* @returns {{ hanzi: string, pinyin: string } | null} Parsed hanzi and pinyin, or null if not matching
*/
function parseChinesePattern(text) {
const match = text.match(/^(.+?)\s*\((.+?)\)$/);
if (!match) {
return null;
}
return {
hanzi: match[1].trim(),
pinyin: match[2].trim()
};
}
/**
* Custom handler for Chinese inline code to render as ruby elements
* @param {object} state - The state object from mdast-util-to-hast
* @param {object} node - The inlineCode node
* @returns {object} Hast element node
*/
function chineseInlineCodeHandler(state, node) {
const parsed = parseChinesePattern(node.value);
if (parsed) {
return {
type: 'element',
tagName: 'ruby',
properties: {},
children: [
{ type: 'text', value: parsed.hanzi },
{
type: 'element',
tagName: 'rp',
properties: {},
children: [{ type: 'text', value: '(' }]
},
{
type: 'element',
tagName: 'rt',
properties: {},
children: [{ type: 'text', value: parsed.pinyin }]
},
{
type: 'element',
tagName: 'rp',
properties: {},
children: [{ type: 'text', value: ')' }]
}
]
};
}
return {
type: 'element',
// TODO: change this to span
// https://github.com/freeCodeCamp/language-curricula/issues/22
tagName: 'code',
properties: {},
children: [{ type: 'text', value: node.value }]
};
}
const rubyOptions = {
handlers: {
inlineCode: chineseInlineCodeHandler
}
};
const createMdastToHtml = lang =>
lang == 'zh-CN' ? x => mdastToHTML(x, rubyOptions) : mdastToHTML;
module.exports = { parseChinesePattern, createMdastToHtml };

View File

@@ -0,0 +1,129 @@
import { describe, it, expect } from 'vitest';
import { createMdastToHtml, parseChinesePattern } from './i18n-stringify';
describe('parseChinesePattern', () => {
it('should parse Chinese text with hanzi and pinyin', () => {
const result = parseChinesePattern('你好 (nǐ hǎo)');
expect(result).toEqual({
hanzi: '你好',
pinyin: 'nǐ hǎo'
});
});
it('should handle text without spaces before parentheses', () => {
const result = parseChinesePattern('你好(nǐ hǎo)');
expect(result).toEqual({
hanzi: '你好',
pinyin: 'nǐ hǎo'
});
});
it('should handle text with multiple spaces', () => {
const result = parseChinesePattern('你好 (nǐ hǎo)');
expect(result).toEqual({
hanzi: '你好',
pinyin: 'nǐ hǎo'
});
});
it('should return null for text without parentheses', () => {
const result = parseChinesePattern('你好');
expect(result).toBeNull();
});
it('should return null for text with only opening parenthesis', () => {
const result = parseChinesePattern('你好 (nǐ hǎo');
expect(result).toBeNull();
});
it('should return null for empty string', () => {
const result = parseChinesePattern('');
expect(result).toBeNull();
});
});
describe('createMdastToHtml', () => {
it('should render Chinese inline code as ruby when lang is zh-CN', () => {
const toHtml = createMdastToHtml('zh-CN');
const nodes = [
{
type: 'paragraph',
children: [
{ type: 'text', value: 'This is ' },
{ type: 'inlineCode', value: '请问 (qǐng wèn)' },
{ type: 'text', value: '.' }
]
}
];
const actual = toHtml(nodes);
expect(actual).toBe(
'<p>This is <ruby>请问<rp>(</rp><rt>qǐng wèn</rt><rp>)</rp></ruby>.</p>'
);
});
it('should render Chinese inline code as ruby with or without space before parenthesis', () => {
const toHtml = createMdastToHtml('zh-CN');
const nodesWithSpace = [
{
type: 'paragraph',
children: [{ type: 'inlineCode', value: '你好 (nǐ hǎo)' }]
}
];
const nodesWithoutSpace = [
{
type: 'paragraph',
children: [{ type: 'inlineCode', value: '你好(nǐ hǎo)' }]
}
];
const expected =
'<p><ruby>你好<rp>(</rp><rt>nǐ hǎo</rt><rp>)</rp></ruby></p>';
expect(toHtml(nodesWithSpace)).toBe(expected);
expect(toHtml(nodesWithoutSpace)).toBe(expected);
});
it('should handle multiple Chinese inline codes in one paragraph', () => {
const toHtml = createMdastToHtml('zh-CN');
const nodes = [
{
type: 'paragraph',
children: [
{ type: 'inlineCode', value: '你好 (nǐ hǎo)' },
{ type: 'text', value: ' and ' },
{ type: 'inlineCode', value: '再见 (zài jiàn)' }
]
}
];
const actual = toHtml(nodes);
expect(actual).toBe(
'<p><ruby>你好<rp>(</rp><rt>nǐ hǎo</rt><rp>)</rp></ruby> and <ruby>再见<rp>(</rp><rt>zài jiàn</rt><rp>)</rp></ruby></p>'
);
});
it('should fallback to code element if pattern does not match', () => {
const toHtml = createMdastToHtml('zh-CN');
const nodes = [
{
type: 'paragraph',
children: [
{ type: 'inlineCode', value: '你好' },
{ type: 'text', value: ' and ' },
{ type: 'inlineCode', value: 'nǐ hǎo' }
]
}
];
const actual = toHtml(nodes, { lang: 'zh-CN' });
expect(actual).toBe('<p><code>你好</code> and <code>nǐ hǎo</code></p>');
});
it('should render as regular code when lang is not zh-CN', () => {
const toHtml = createMdastToHtml('zh');
const nodes = [
{
type: 'paragraph',
children: [{ type: 'inlineCode', value: '请问 (qǐng wèn)' }]
}
];
const actual = toHtml(nodes);
expect(actual).toBe('<p><code>请问 (qǐng wèn)</code></p>');
});
});

View File

@@ -2,14 +2,17 @@ const hastToHTML = require('hast-util-to-html');
const { root } = require('mdast-builder');
const mdastToHast = require('mdast-util-to-hast');
function mdastToHTML(nodes) {
function mdastToHTML(nodes, hastOptions = {}) {
if (!Array.isArray(nodes))
throw Error('mdastToHTML expects an array argument');
// - the 'nodes' are children, so first need embedding in a parent
return hastToHTML(mdastToHast(root(nodes), { allowDangerousHtml: true }), {
allowDangerousHtml: true
});
return hastToHTML(
mdastToHast(root(nodes), { allowDangerousHtml: true, ...hastOptions }),
{
allowDangerousHtml: true
}
);
}
module.exports = mdastToHTML;