mirror of
https://github.com/langgenius/dify.git
synced 2026-04-11 06:00:28 -04:00
77 lines
3.0 KiB
Python
77 lines
3.0 KiB
Python
import pytest
|
|
|
|
from core.tools.utils.text_processing_utils import remove_leading_symbols
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("input_text", "expected_output"),
|
|
[
|
|
("...Hello, World!", "Hello, World!"),
|
|
("。测试中文标点", "测试中文标点"),
|
|
# Note: ! is not in the removal pattern, only @# are removed, leaving "!Test symbols"
|
|
# The pattern intentionally excludes ! as per #11868 fix
|
|
("@#Test symbols", "Test symbols"),
|
|
("Hello, World!", "Hello, World!"),
|
|
("", ""),
|
|
(" ", " "),
|
|
("【测试】", "【测试】"),
|
|
# Markdown link preservation - should be preserved if text starts with a markdown link
|
|
("[Google](https://google.com) is a search engine", "[Google](https://google.com) is a search engine"),
|
|
("[Example](http://example.com) some text", "[Example](http://example.com) some text"),
|
|
# Leading symbols before markdown link are removed, including the opening bracket [
|
|
("@[Test](https://example.com)", "[Test](https://example.com)"),
|
|
("~~标题~~", "标题~~"),
|
|
('""quoted', "quoted"),
|
|
("''test", "test"),
|
|
("##话题", "话题"),
|
|
("$$价格", "价格"),
|
|
("%%百分比", "百分比"),
|
|
("&&与逻辑", "与逻辑"),
|
|
("((括号))", "括号))"),
|
|
("**强调**", "强调**"),
|
|
("++自增", "自增"),
|
|
(",,逗号", "逗号"),
|
|
("..省略", "省略"),
|
|
("//注释", "注释"),
|
|
("::范围", "范围"),
|
|
(";;分号", "分号"),
|
|
("<<左移", "左移"),
|
|
("==等于", "等于"),
|
|
(">>右移", "右移"),
|
|
("??疑问", "疑问"),
|
|
("@@提及", "提及"),
|
|
("^^上标", "上标"),
|
|
("__下划线", "下划线"),
|
|
("``代码", "代码"),
|
|
("~~删除线", "删除线"),
|
|
(" 全角空格开头", "全角空格开头"),
|
|
("、顿号开头", "顿号开头"),
|
|
("。句号开头", "句号开头"),
|
|
("「引号」测试", "引号」测试"),
|
|
("『书名号』", "书名号』"),
|
|
("【保留】测试", "【保留】测试"),
|
|
("〖括号〗测试", "括号〗测试"),
|
|
("〔括号〕测试", "括号〕测试"),
|
|
("~~【保留】~~", "【保留】~~"),
|
|
('"[公告]"', '[公告]"'),
|
|
("[公告] 更新", "[公告] 更新"),
|
|
("【通知】重要", "【通知】重要"),
|
|
("[[嵌套]]", "[[嵌套]]"),
|
|
("【【嵌套】】", "【【嵌套】】"),
|
|
("[【混合】]", "[【混合】]"),
|
|
("normal text", "normal text"),
|
|
("123数字", "123数字"),
|
|
("中文开头", "中文开头"),
|
|
("alpha", "alpha"),
|
|
("~", ""),
|
|
("【", "【"),
|
|
("[", "["),
|
|
("~~~", ""),
|
|
("【【【", "【【【"),
|
|
("\t制表符", "\t制表符"),
|
|
("\n换行", "\n换行"),
|
|
],
|
|
)
|
|
def test_remove_leading_symbols(input_text, expected_output):
|
|
assert remove_leading_symbols(input_text) == expected_output
|