refactor: core/rag docstore, datasource, embedding, rerank, retrieval (#34203)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Asuka Minato <i@asukaminato.eu.org>
This commit is contained in:
Renzo
2026-03-30 10:09:49 +02:00
committed by GitHub
parent 40fa0f365c
commit 456684dfc3
24 changed files with 170 additions and 214 deletions

View File

@@ -97,13 +97,13 @@ class Jieba(BaseKeyword):
documents = []
segment_query_stmt = db.session.query(DocumentSegment).where(
segment_query_stmt = select(DocumentSegment).where(
DocumentSegment.dataset_id == self.dataset.id, DocumentSegment.index_node_id.in_(sorted_chunk_indices)
)
if document_ids_filter:
segment_query_stmt = segment_query_stmt.where(DocumentSegment.document_id.in_(document_ids_filter))
segments = db.session.execute(segment_query_stmt).scalars().all()
segments = db.session.scalars(segment_query_stmt).all()
segment_map = {segment.index_node_id: segment for segment in segments}
for chunk_index in sorted_chunk_indices:
segment = segment_map.get(chunk_index)

View File

@@ -432,10 +432,11 @@ class RetrievalService:
# Batch query dataset documents
dataset_documents = {
doc.id: doc
for doc in db.session.query(DatasetDocument)
.where(DatasetDocument.id.in_(document_ids))
.options(load_only(DatasetDocument.id, DatasetDocument.doc_form, DatasetDocument.dataset_id))
.all()
for doc in db.session.scalars(
select(DatasetDocument)
.where(DatasetDocument.id.in_(document_ids))
.options(load_only(DatasetDocument.id, DatasetDocument.doc_form, DatasetDocument.dataset_id))
).all()
}
valid_dataset_documents = {}

View File

@@ -426,11 +426,10 @@ class TidbOnQdrantVectorFactory(AbstractVectorFactory):
TIDB_ON_QDRANT_API_KEY = f"{tidb_auth_binding.account}:{tidb_auth_binding.password}"
else:
idle_tidb_auth_binding = (
db.session.query(TidbAuthBinding)
idle_tidb_auth_binding = db.session.scalar(
select(TidbAuthBinding)
.where(TidbAuthBinding.active == False, TidbAuthBinding.status == "ACTIVE")
.limit(1)
.one_or_none()
)
if idle_tidb_auth_binding:
idle_tidb_auth_binding.active = True

View File

@@ -277,7 +277,7 @@ class Vector:
return self._vector_processor.search_by_vector(query_vector, **kwargs)
def search_by_file(self, file_id: str, **kwargs: Any) -> list[Document]:
upload_file: UploadFile | None = db.session.query(UploadFile).where(UploadFile.id == file_id).first()
upload_file: UploadFile | None = db.session.get(UploadFile, file_id)
if not upload_file:
return []