mirror of
https://github.com/langgenius/dify.git
synced 2026-03-22 10:04:04 -04:00
perf: tidb_on_qdrant_vector delete_by_ids use batch delete (#33846)
This commit is contained in:
@@ -284,27 +284,29 @@ class TidbOnQdrantVector(BaseVector):
|
||||
from qdrant_client.http import models
|
||||
from qdrant_client.http.exceptions import UnexpectedResponse
|
||||
|
||||
for node_id in ids:
|
||||
try:
|
||||
filter = models.Filter(
|
||||
must=[
|
||||
models.FieldCondition(
|
||||
key="metadata.doc_id",
|
||||
match=models.MatchValue(value=node_id),
|
||||
),
|
||||
],
|
||||
)
|
||||
self._client.delete(
|
||||
collection_name=self._collection_name,
|
||||
points_selector=FilterSelector(filter=filter),
|
||||
)
|
||||
except UnexpectedResponse as e:
|
||||
# Collection does not exist, so return
|
||||
if e.status_code == 404:
|
||||
return
|
||||
# Some other error occurred, so re-raise the exception
|
||||
else:
|
||||
raise e
|
||||
if not ids:
|
||||
return
|
||||
|
||||
try:
|
||||
filter = models.Filter(
|
||||
must=[
|
||||
models.FieldCondition(
|
||||
key="metadata.doc_id",
|
||||
match=models.MatchAny(any=ids),
|
||||
),
|
||||
],
|
||||
)
|
||||
self._client.delete(
|
||||
collection_name=self._collection_name,
|
||||
points_selector=FilterSelector(filter=filter),
|
||||
)
|
||||
except UnexpectedResponse as e:
|
||||
# Collection does not exist, so return
|
||||
if e.status_code == 404:
|
||||
return
|
||||
# Some other error occurred, so re-raise the exception
|
||||
else:
|
||||
raise e
|
||||
|
||||
def text_exists(self, id: str) -> bool:
|
||||
all_collection_name = []
|
||||
|
||||
@@ -0,0 +1,160 @@
|
||||
from unittest.mock import patch
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
from qdrant_client.http import models as rest
|
||||
from qdrant_client.http.exceptions import UnexpectedResponse
|
||||
|
||||
from core.rag.datasource.vdb.tidb_on_qdrant.tidb_on_qdrant_vector import (
|
||||
TidbOnQdrantConfig,
|
||||
TidbOnQdrantVector,
|
||||
)
|
||||
|
||||
|
||||
class TestTidbOnQdrantVectorDeleteByIds:
|
||||
"""Unit tests for TidbOnQdrantVector.delete_by_ids method."""
|
||||
|
||||
@pytest.fixture
|
||||
def vector_instance(self):
|
||||
"""Create a TidbOnQdrantVector instance for testing."""
|
||||
config = TidbOnQdrantConfig(
|
||||
endpoint="http://localhost:6333",
|
||||
api_key="test_api_key",
|
||||
)
|
||||
|
||||
with patch("core.rag.datasource.vdb.tidb_on_qdrant.tidb_on_qdrant_vector.qdrant_client.QdrantClient"):
|
||||
vector = TidbOnQdrantVector(
|
||||
collection_name="test_collection",
|
||||
group_id="test_group",
|
||||
config=config,
|
||||
)
|
||||
return vector
|
||||
|
||||
def test_delete_by_ids_with_multiple_ids(self, vector_instance):
|
||||
"""Test batch deletion with multiple document IDs."""
|
||||
ids = ["doc1", "doc2", "doc3"]
|
||||
|
||||
vector_instance.delete_by_ids(ids)
|
||||
|
||||
# Verify that delete was called once with MatchAny filter
|
||||
vector_instance._client.delete.assert_called_once()
|
||||
call_args = vector_instance._client.delete.call_args
|
||||
|
||||
# Check collection name
|
||||
assert call_args[1]["collection_name"] == "test_collection"
|
||||
|
||||
# Verify filter uses MatchAny with all IDs
|
||||
filter_selector = call_args[1]["points_selector"]
|
||||
filter_obj = filter_selector.filter
|
||||
assert len(filter_obj.must) == 1
|
||||
|
||||
field_condition = filter_obj.must[0]
|
||||
assert field_condition.key == "metadata.doc_id"
|
||||
assert isinstance(field_condition.match, rest.MatchAny)
|
||||
assert set(field_condition.match.any) == {"doc1", "doc2", "doc3"}
|
||||
|
||||
def test_delete_by_ids_with_single_id(self, vector_instance):
|
||||
"""Test deletion with a single document ID."""
|
||||
ids = ["doc1"]
|
||||
|
||||
vector_instance.delete_by_ids(ids)
|
||||
|
||||
# Verify that delete was called once
|
||||
vector_instance._client.delete.assert_called_once()
|
||||
call_args = vector_instance._client.delete.call_args
|
||||
|
||||
# Verify filter uses MatchAny with single ID
|
||||
filter_selector = call_args[1]["points_selector"]
|
||||
filter_obj = filter_selector.filter
|
||||
field_condition = filter_obj.must[0]
|
||||
assert isinstance(field_condition.match, rest.MatchAny)
|
||||
assert field_condition.match.any == ["doc1"]
|
||||
|
||||
def test_delete_by_ids_with_empty_list(self, vector_instance):
|
||||
"""Test deletion with empty ID list returns early without API call."""
|
||||
vector_instance.delete_by_ids([])
|
||||
|
||||
# Verify that delete was NOT called
|
||||
vector_instance._client.delete.assert_not_called()
|
||||
|
||||
def test_delete_by_ids_with_404_error(self, vector_instance):
|
||||
"""Test that 404 errors (collection not found) are handled gracefully."""
|
||||
ids = ["doc1", "doc2"]
|
||||
|
||||
# Mock a 404 error
|
||||
error = UnexpectedResponse(
|
||||
status_code=404,
|
||||
reason_phrase="Not Found",
|
||||
content=b"Collection not found",
|
||||
headers=httpx.Headers(),
|
||||
)
|
||||
vector_instance._client.delete.side_effect = error
|
||||
|
||||
# Should not raise an exception
|
||||
vector_instance.delete_by_ids(ids)
|
||||
|
||||
# Verify delete was called
|
||||
vector_instance._client.delete.assert_called_once()
|
||||
|
||||
def test_delete_by_ids_with_unexpected_error(self, vector_instance):
|
||||
"""Test that non-404 errors are re-raised."""
|
||||
ids = ["doc1", "doc2"]
|
||||
|
||||
# Mock a 500 error
|
||||
error = UnexpectedResponse(
|
||||
status_code=500,
|
||||
reason_phrase="Internal Server Error",
|
||||
content=b"Server error",
|
||||
headers=httpx.Headers(),
|
||||
)
|
||||
vector_instance._client.delete.side_effect = error
|
||||
|
||||
# Should re-raise the exception
|
||||
with pytest.raises(UnexpectedResponse) as exc_info:
|
||||
vector_instance.delete_by_ids(ids)
|
||||
|
||||
assert exc_info.value.status_code == 500
|
||||
|
||||
def test_delete_by_ids_with_large_batch(self, vector_instance):
|
||||
"""Test deletion with a large batch of IDs."""
|
||||
# Create 1000 IDs
|
||||
ids = [f"doc_{i}" for i in range(1000)]
|
||||
|
||||
vector_instance.delete_by_ids(ids)
|
||||
|
||||
# Verify single delete call with all IDs
|
||||
vector_instance._client.delete.assert_called_once()
|
||||
call_args = vector_instance._client.delete.call_args
|
||||
|
||||
filter_selector = call_args[1]["points_selector"]
|
||||
filter_obj = filter_selector.filter
|
||||
field_condition = filter_obj.must[0]
|
||||
|
||||
# Verify all 1000 IDs are in the batch
|
||||
assert len(field_condition.match.any) == 1000
|
||||
assert "doc_0" in field_condition.match.any
|
||||
assert "doc_999" in field_condition.match.any
|
||||
|
||||
def test_delete_by_ids_filter_structure(self, vector_instance):
|
||||
"""Test that the filter structure is correctly constructed."""
|
||||
ids = ["doc1", "doc2"]
|
||||
|
||||
vector_instance.delete_by_ids(ids)
|
||||
|
||||
call_args = vector_instance._client.delete.call_args
|
||||
filter_selector = call_args[1]["points_selector"]
|
||||
filter_obj = filter_selector.filter
|
||||
|
||||
# Verify Filter structure
|
||||
assert isinstance(filter_obj, rest.Filter)
|
||||
assert filter_obj.must is not None
|
||||
assert len(filter_obj.must) == 1
|
||||
|
||||
# Verify FieldCondition structure
|
||||
field_condition = filter_obj.must[0]
|
||||
assert isinstance(field_condition, rest.FieldCondition)
|
||||
assert field_condition.key == "metadata.doc_id"
|
||||
|
||||
# Verify MatchAny structure
|
||||
assert isinstance(field_condition.match, rest.MatchAny)
|
||||
assert field_condition.match.any == ids
|
||||
Reference in New Issue
Block a user