Files
dify/api/tests/unit_tests/utils/test_text_processing.py
wangxiaolei d042cbc62e fix: fix remove_leading_symbols remove [ (#34832)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2026-04-09 08:22:09 +00:00

77 lines
3.0 KiB
Python

import pytest
from core.tools.utils.text_processing_utils import remove_leading_symbols
@pytest.mark.parametrize(
("input_text", "expected_output"),
[
("...Hello, World!", "Hello, World!"),
("。测试中文标点", "测试中文标点"),
# Note: ! is not in the removal pattern, only @# are removed, leaving "!Test symbols"
# The pattern intentionally excludes ! as per #11868 fix
("@#Test symbols", "Test symbols"),
("Hello, World!", "Hello, World!"),
("", ""),
(" ", " "),
("【测试】", "【测试】"),
# Markdown link preservation - should be preserved if text starts with a markdown link
("[Google](https://google.com) is a search engine", "[Google](https://google.com) is a search engine"),
("[Example](http://example.com) some text", "[Example](http://example.com) some text"),
# Leading symbols before markdown link are removed, including the opening bracket [
("@[Test](https://example.com)", "[Test](https://example.com)"),
("~~标题~~", "标题~~"),
('""quoted', "quoted"),
("''test", "test"),
("##话题", "话题"),
("$$价格", "价格"),
("%%百分比", "百分比"),
("&&与逻辑", "与逻辑"),
("((括号))", "括号))"),
("**强调**", "强调**"),
("++自增", "自增"),
(",,逗号", "逗号"),
("..省略", "省略"),
("//注释", "注释"),
("::范围", "范围"),
(";;分号", "分号"),
("<<左移", "左移"),
("==等于", "等于"),
(">>右移", "右移"),
("??疑问", "疑问"),
("@@提及", "提及"),
("^^上标", "上标"),
("__下划线", "下划线"),
("``代码", "代码"),
("~~删除线", "删除线"),
(" 全角空格开头", "全角空格开头"),
("、顿号开头", "顿号开头"),
("。句号开头", "句号开头"),
("「引号」测试", "引号」测试"),
("『书名号』", "书名号』"),
("【保留】测试", "【保留】测试"),
("〖括号〗测试", "括号〗测试"),
("〔括号〕测试", "括号〕测试"),
("~~【保留】~~", "【保留】~~"),
('"[公告]"', '[公告]"'),
("[公告] 更新", "[公告] 更新"),
("【通知】重要", "【通知】重要"),
("[[嵌套]]", "[[嵌套]]"),
("【【嵌套】】", "【【嵌套】】"),
("[【混合】]", "[【混合】]"),
("normal text", "normal text"),
("123数字", "123数字"),
("中文开头", "中文开头"),
("alpha", "alpha"),
("~", ""),
("", ""),
("[", "["),
("~~~", ""),
("【【【", "【【【"),
("\t制表符", "\t制表符"),
("\n换行", "\n换行"),
],
)
def test_remove_leading_symbols(input_text, expected_output):
assert remove_leading_symbols(input_text) == expected_output