import pytest from core.tools.utils.text_processing_utils import remove_leading_symbols @pytest.mark.parametrize( ("input_text", "expected_output"), [ ("...Hello, World!", "Hello, World!"), ("。测试中文标点", "测试中文标点"), # Note: ! is not in the removal pattern, only @# are removed, leaving "!Test symbols" # The pattern intentionally excludes ! as per #11868 fix ("@#Test symbols", "Test symbols"), ("Hello, World!", "Hello, World!"), ("", ""), (" ", " "), ("【测试】", "【测试】"), # Markdown link preservation - should be preserved if text starts with a markdown link ("[Google](https://google.com) is a search engine", "[Google](https://google.com) is a search engine"), ("[Example](http://example.com) some text", "[Example](http://example.com) some text"), # Leading symbols before markdown link are removed, including the opening bracket [ ("@[Test](https://example.com)", "[Test](https://example.com)"), ("~~标题~~", "标题~~"), ('""quoted', "quoted"), ("''test", "test"), ("##话题", "话题"), ("$$价格", "价格"), ("%%百分比", "百分比"), ("&&与逻辑", "与逻辑"), ("((括号))", "括号))"), ("**强调**", "强调**"), ("++自增", "自增"), (",,逗号", "逗号"), ("..省略", "省略"), ("//注释", "注释"), ("::范围", "范围"), (";;分号", "分号"), ("<<左移", "左移"), ("==等于", "等于"), (">>右移", "右移"), ("??疑问", "疑问"), ("@@提及", "提及"), ("^^上标", "上标"), ("__下划线", "下划线"), ("``代码", "代码"), ("~~删除线", "删除线"), (" 全角空格开头", "全角空格开头"), ("、顿号开头", "顿号开头"), ("。句号开头", "句号开头"), ("「引号」测试", "引号」测试"), ("『书名号』", "书名号』"), ("【保留】测试", "【保留】测试"), ("〖括号〗测试", "括号〗测试"), ("〔括号〕测试", "括号〕测试"), ("~~【保留】~~", "【保留】~~"), ('"[公告]"', '[公告]"'), ("[公告] 更新", "[公告] 更新"), ("【通知】重要", "【通知】重要"), ("[[嵌套]]", "[[嵌套]]"), ("【【嵌套】】", "【【嵌套】】"), ("[【混合】]", "[【混合】]"), ("normal text", "normal text"), ("123数字", "123数字"), ("中文开头", "中文开头"), ("alpha", "alpha"), ("~", ""), ("【", "【"), ("[", "["), ("~~~", ""), ("【【【", "【【【"), ("\t制表符", "\t制表符"), ("\n换行", "\n换行"), ], ) def test_remove_leading_symbols(input_text, expected_output): assert remove_leading_symbols(input_text) == expected_output