From 603532863df0a99dd0b2114a786ee08f38ca9aa4 Mon Sep 17 00:00:00 2001 From: Lillian <11332799+Lillian68@users.noreply.github.com> Date: Sun, 24 May 2026 15:59:16 +0800 Subject: [PATCH] chore: add UUID/str type annotations to api endpoints for files in api/controllers/console/datasets (#36560) --- .../console/datasets/data_source.py | 11 +- api/controllers/console/datasets/datasets.py | 35 ++-- .../console/datasets/datasets_document.py | 193 +++++++++--------- .../console/datasets/datasets_segments.py | 182 +++++++++-------- api/controllers/console/datasets/external.py | 28 +-- .../console/datasets/hit_testing.py | 3 +- api/controllers/console/datasets/metadata.py | 13 +- .../rag_pipeline/rag_pipeline_import.py | 2 +- .../rag_pipeline/rag_pipeline_workflow.py | 17 +- 9 files changed, 248 insertions(+), 236 deletions(-) diff --git a/api/controllers/console/datasets/data_source.py b/api/controllers/console/datasets/data_source.py index f81adb0313..e3a6d20b2b 100644 --- a/api/controllers/console/datasets/data_source.py +++ b/api/controllers/console/datasets/data_source.py @@ -1,6 +1,7 @@ import json from collections.abc import Generator from typing import Any, Literal, cast +from uuid import UUID from flask import request from flask_restx import Resource, fields, marshal_with @@ -293,7 +294,7 @@ class DataSourceNotionApi(Resource): @login_required @account_initialization_required @console_ns.response(200, "Success", console_ns.models[TextContentResponse.__name__]) - def get(self, page_id, page_type): + def get(self, page_id: UUID, page_type: str): _, current_tenant_id = current_account_with_tenant() query = DataSourceNotionPreviewQuery.model_validate(request.args.to_dict()) @@ -306,11 +307,11 @@ class DataSourceNotionApi(Resource): plugin_id="langgenius/notion_datasource", ) - page_id = str(page_id) + page_id_str = str(page_id) extractor = NotionExtractor( notion_workspace_id="", - notion_obj_id=page_id, + notion_obj_id=page_id_str, notion_page_type=page_type, notion_access_token=credential.get("integration_secret"), tenant_id=current_tenant_id, @@ -367,7 +368,7 @@ class DataSourceNotionDatasetSyncApi(Resource): @login_required @account_initialization_required @console_ns.response(200, "Success", console_ns.models[SimpleResultResponse.__name__]) - def get(self, dataset_id): + def get(self, dataset_id: UUID): dataset_id_str = str(dataset_id) dataset = DatasetService.get_dataset(dataset_id_str) if dataset is None: @@ -385,7 +386,7 @@ class DataSourceNotionDocumentSyncApi(Resource): @login_required @account_initialization_required @console_ns.response(200, "Success", console_ns.models[SimpleResultResponse.__name__]) - def get(self, dataset_id, document_id): + def get(self, dataset_id: UUID, document_id: UUID): dataset_id_str = str(dataset_id) document_id_str = str(document_id) dataset = DatasetService.get_dataset(dataset_id_str) diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py index 3cc1e6b028..8e453f96dd 100644 --- a/api/controllers/console/datasets/datasets.py +++ b/api/controllers/console/datasets/datasets.py @@ -1,5 +1,6 @@ from datetime import datetime from typing import Any +from uuid import UUID from flask import request from flask_restx import Resource @@ -511,7 +512,7 @@ class DatasetApi(Resource): @setup_required @login_required @account_initialization_required - def get(self, dataset_id): + def get(self, dataset_id: UUID): current_user, current_tenant_id = current_account_with_tenant() dataset_id_str = str(dataset_id) dataset = DatasetService.get_dataset(dataset_id_str) @@ -565,7 +566,7 @@ class DatasetApi(Resource): @login_required @account_initialization_required @cloud_edition_billing_rate_limit_check("knowledge") - def patch(self, dataset_id): + def patch(self, dataset_id: UUID): dataset_id_str = str(dataset_id) dataset = DatasetService.get_dataset(dataset_id_str) if dataset is None: @@ -613,7 +614,7 @@ class DatasetApi(Resource): @account_initialization_required @cloud_edition_billing_rate_limit_check("knowledge") @console_ns.response(204, "Dataset deleted successfully") - def delete(self, dataset_id): + def delete(self, dataset_id: UUID): dataset_id_str = str(dataset_id) current_user, _ = current_account_with_tenant() @@ -643,7 +644,7 @@ class DatasetUseCheckApi(Resource): @setup_required @login_required @account_initialization_required - def get(self, dataset_id): + def get(self, dataset_id: UUID): dataset_id_str = str(dataset_id) dataset_is_using = DatasetService.dataset_use_check(dataset_id_str) @@ -663,7 +664,7 @@ class DatasetQueryApi(Resource): @setup_required @login_required @account_initialization_required - def get(self, dataset_id): + def get(self, dataset_id: UUID): current_user, _ = current_account_with_tenant() dataset_id_str = str(dataset_id) dataset = DatasetService.get_dataset(dataset_id_str) @@ -803,7 +804,7 @@ class DatasetRelatedAppListApi(Resource): @setup_required @login_required @account_initialization_required - def get(self, dataset_id): + def get(self, dataset_id: UUID): current_user, _ = current_account_with_tenant() dataset_id_str = str(dataset_id) dataset = DatasetService.get_dataset(dataset_id_str) @@ -839,11 +840,11 @@ class DatasetIndexingStatusApi(Resource): @setup_required @login_required @account_initialization_required - def get(self, dataset_id): + def get(self, dataset_id: UUID): _, current_tenant_id = current_account_with_tenant() - dataset_id = str(dataset_id) + dataset_id_str = str(dataset_id) documents = db.session.scalars( - select(Document).where(Document.dataset_id == dataset_id, Document.tenant_id == current_tenant_id) + select(Document).where(Document.dataset_id == dataset_id_str, Document.tenant_id == current_tenant_id) ).all() documents_status = [] for document in documents: @@ -951,15 +952,15 @@ class DatasetApiDeleteApi(Resource): @login_required @is_admin_or_owner_required @account_initialization_required - def delete(self, api_key_id): + def delete(self, api_key_id: UUID): _, current_tenant_id = current_account_with_tenant() - api_key_id = str(api_key_id) + api_key_id_str = str(api_key_id) key = db.session.scalar( select(ApiToken) .where( ApiToken.tenant_id == current_tenant_id, ApiToken.type == self.resource_type, - ApiToken.id == api_key_id, + ApiToken.id == api_key_id_str, ) .limit(1) ) @@ -984,7 +985,7 @@ class DatasetEnableApiApi(Resource): @login_required @account_initialization_required @console_ns.response(200, "Success", console_ns.models[SimpleResultResponse.__name__]) - def post(self, dataset_id, status): + def post(self, dataset_id: UUID, status: str): dataset_id_str = str(dataset_id) DatasetService.update_dataset_api_status(dataset_id_str, status == "enable") @@ -1036,7 +1037,7 @@ class DatasetRetrievalSettingMockApi(Resource): @setup_required @login_required @account_initialization_required - def get(self, vector_type): + def get(self, vector_type: str): return dump_response( RetrievalSettingResponse, _get_retrieval_methods_by_vector_type(vector_type, is_mock=True), @@ -1053,7 +1054,7 @@ class DatasetErrorDocs(Resource): @setup_required @login_required @account_initialization_required - def get(self, dataset_id): + def get(self, dataset_id: UUID): dataset_id_str = str(dataset_id) dataset = DatasetService.get_dataset(dataset_id_str) if dataset is None: @@ -1078,7 +1079,7 @@ class DatasetPermissionUserListApi(Resource): @setup_required @login_required @account_initialization_required - def get(self, dataset_id): + def get(self, dataset_id: UUID): current_user, _ = current_account_with_tenant() dataset_id_str = str(dataset_id) dataset = DatasetService.get_dataset(dataset_id_str) @@ -1108,7 +1109,7 @@ class DatasetAutoDisableLogApi(Resource): @setup_required @login_required @account_initialization_required - def get(self, dataset_id): + def get(self, dataset_id: UUID): dataset_id_str = str(dataset_id) dataset = DatasetService.get_dataset(dataset_id_str) if dataset is None: diff --git a/api/controllers/console/datasets/datasets_document.py b/api/controllers/console/datasets/datasets_document.py index fabd61e6b0..d387834e9b 100644 --- a/api/controllers/console/datasets/datasets_document.py +++ b/api/controllers/console/datasets/datasets_document.py @@ -5,6 +5,7 @@ from collections.abc import Sequence from contextlib import ExitStack from datetime import datetime from typing import Any, Literal, cast +from uuid import UUID import sqlalchemy as sa from flask import request, send_file @@ -315,9 +316,9 @@ class DatasetDocumentListApi(Resource): @setup_required @login_required @account_initialization_required - def get(self, dataset_id): + def get(self, dataset_id: UUID): current_user, current_tenant_id = current_account_with_tenant() - dataset_id = str(dataset_id) + dataset_id_str = str(dataset_id) raw_args = request.args.to_dict() param = DocumentDatasetListParam.model_validate(raw_args) page = param.page @@ -342,7 +343,7 @@ class DatasetDocumentListApi(Resource): ) except (ArgumentTypeError, ValueError, Exception): fetch = False - dataset = DatasetService.get_dataset(dataset_id) + dataset = DatasetService.get_dataset(dataset_id_str) if not dataset: raise NotFound("Dataset not found.") @@ -351,7 +352,7 @@ class DatasetDocumentListApi(Resource): except services.errors.account.NoPermissionError as e: raise Forbidden(str(e)) - query = select(Document).where(Document.dataset_id == str(dataset_id), Document.tenant_id == current_tenant_id) + query = select(Document).where(Document.dataset_id == dataset_id_str, Document.tenant_id == current_tenant_id) if status: query = DocumentService.apply_display_status_filter(query, status) @@ -372,7 +373,7 @@ class DatasetDocumentListApi(Resource): sa.select( DocumentSegment.document_id, sa.func.sum(DocumentSegment.hit_count).label("total_hit_count") ) - .where(DocumentSegment.dataset_id == str(dataset_id)) + .where(DocumentSegment.dataset_id == dataset_id_str) .group_by(DocumentSegment.document_id) .subquery() ) @@ -444,11 +445,11 @@ class DatasetDocumentListApi(Resource): @cloud_edition_billing_rate_limit_check("knowledge") @console_ns.expect(console_ns.models[KnowledgeConfig.__name__]) @console_ns.response(200, "Documents created successfully", console_ns.models[DatasetAndDocumentResponse.__name__]) - def post(self, dataset_id): + def post(self, dataset_id: UUID): current_user, _ = current_account_with_tenant() - dataset_id = str(dataset_id) + dataset_id_str = str(dataset_id) - dataset = DatasetService.get_dataset(dataset_id) + dataset = DatasetService.get_dataset(dataset_id_str) if not dataset: raise NotFound("Dataset not found.") @@ -472,7 +473,7 @@ class DatasetDocumentListApi(Resource): try: documents, batch = DocumentService.save_document_with_dataset_id(dataset, knowledge_config, current_user) - dataset = DatasetService.get_dataset(dataset_id) + dataset = DatasetService.get_dataset(dataset_id_str) except ProviderTokenNotInitError as ex: raise ProviderNotInitializeError(ex.description) @@ -490,9 +491,9 @@ class DatasetDocumentListApi(Resource): @account_initialization_required @cloud_edition_billing_rate_limit_check("knowledge") @console_ns.response(204, "Documents deleted successfully") - def delete(self, dataset_id): - dataset_id = str(dataset_id) - dataset = DatasetService.get_dataset(dataset_id) + def delete(self, dataset_id: UUID): + dataset_id_str = str(dataset_id) + dataset = DatasetService.get_dataset(dataset_id_str) if dataset is None: raise NotFound("Dataset not found.") # check user's model setting @@ -582,11 +583,11 @@ class DocumentIndexingEstimateApi(DocumentResource): @setup_required @login_required @account_initialization_required - def get(self, dataset_id, document_id): + def get(self, dataset_id: UUID, document_id: UUID): _, current_tenant_id = current_account_with_tenant() - dataset_id = str(dataset_id) - document_id = str(document_id) - document = self.get_document(dataset_id, document_id) + dataset_id_str = str(dataset_id) + document_id_str = str(document_id) + document = self.get_document(dataset_id_str, document_id_str) if document.indexing_status in {IndexingStatus.COMPLETED, IndexingStatus.ERROR}: raise DocumentAlreadyFinishedError() @@ -624,7 +625,7 @@ class DocumentIndexingEstimateApi(DocumentResource): data_process_rule_dict, document.doc_form, "English", - dataset_id, + dataset_id_str, ) return estimate_response.model_dump(), 200 except LLMBadRequestError: @@ -647,11 +648,10 @@ class DocumentBatchIndexingEstimateApi(DocumentResource): @setup_required @login_required @account_initialization_required - def get(self, dataset_id, batch): + def get(self, dataset_id: UUID, batch: str): _, current_tenant_id = current_account_with_tenant() - dataset_id = str(dataset_id) - batch = str(batch) - documents = self.get_batch_documents(dataset_id, batch) + dataset_id_str = str(dataset_id) + documents = self.get_batch_documents(dataset_id_str, batch) if not documents: return {"tokens": 0, "total_price": 0, "currency": "USD", "total_segments": 0, "preview": []}, 200 data_process_rule = documents[0].dataset_process_rule @@ -725,7 +725,7 @@ class DocumentBatchIndexingEstimateApi(DocumentResource): data_process_rule_dict, document.doc_form, "English", - dataset_id, + dataset_id_str, ) return response.model_dump(), 200 except LLMBadRequestError: @@ -745,10 +745,9 @@ class DocumentBatchIndexingStatusApi(DocumentResource): @setup_required @login_required @account_initialization_required - def get(self, dataset_id, batch): - dataset_id = str(dataset_id) - batch = str(batch) - documents = self.get_batch_documents(dataset_id, batch) + def get(self, dataset_id: UUID, batch: str): + dataset_id_str = str(dataset_id) + documents = self.get_batch_documents(dataset_id_str, batch) documents_status = [] for document in documents: completed_segments = ( @@ -800,16 +799,16 @@ class DocumentIndexingStatusApi(DocumentResource): @setup_required @login_required @account_initialization_required - def get(self, dataset_id, document_id): - dataset_id = str(dataset_id) - document_id = str(document_id) - document = self.get_document(dataset_id, document_id) + def get(self, dataset_id: UUID, document_id: UUID): + dataset_id_str = str(dataset_id) + document_id_str = str(document_id) + document = self.get_document(dataset_id_str, document_id_str) completed_segments = ( db.session.scalar( select(func.count(DocumentSegment.id)).where( DocumentSegment.completed_at.isnot(None), - DocumentSegment.document_id == str(document_id), + DocumentSegment.document_id == str(document_id_str), DocumentSegment.status != SegmentStatus.RE_SEGMENT, ) ) @@ -818,7 +817,7 @@ class DocumentIndexingStatusApi(DocumentResource): total_segments = ( db.session.scalar( select(func.count(DocumentSegment.id)).where( - DocumentSegment.document_id == str(document_id), + DocumentSegment.document_id == str(document_id_str), DocumentSegment.status != SegmentStatus.RE_SEGMENT, ) ) @@ -861,10 +860,10 @@ class DocumentApi(DocumentResource): @setup_required @login_required @account_initialization_required - def get(self, dataset_id, document_id): - dataset_id = str(dataset_id) - document_id = str(document_id) - document = self.get_document(dataset_id, document_id) + def get(self, dataset_id: UUID, document_id: UUID): + dataset_id_str = str(dataset_id) + document_id_str = str(document_id) + document = self.get_document(dataset_id_str, document_id_str) metadata = request.args.get("metadata", "all") if metadata not in self.METADATA_CHOICES: @@ -873,7 +872,7 @@ class DocumentApi(DocumentResource): if metadata == "only": response = {"id": document.id, "doc_type": document.doc_type, "doc_metadata": document.doc_metadata_details} elif metadata == "without": - dataset_process_rules = DatasetService.get_process_rules(dataset_id) + dataset_process_rules = DatasetService.get_process_rules(dataset_id_str) document_process_rules = document.dataset_process_rule.to_dict() if document.dataset_process_rule else {} response = { "id": document.id, @@ -907,7 +906,7 @@ class DocumentApi(DocumentResource): "need_summary": document.need_summary if document.need_summary is not None else False, } else: - dataset_process_rules = DatasetService.get_process_rules(dataset_id) + dataset_process_rules = DatasetService.get_process_rules(dataset_id_str) document_process_rules = document.dataset_process_rule.to_dict() if document.dataset_process_rule else {} response = { "id": document.id, @@ -950,16 +949,16 @@ class DocumentApi(DocumentResource): @account_initialization_required @cloud_edition_billing_rate_limit_check("knowledge") @console_ns.response(204, "Document deleted successfully") - def delete(self, dataset_id, document_id): - dataset_id = str(dataset_id) - document_id = str(document_id) - dataset = DatasetService.get_dataset(dataset_id) + def delete(self, dataset_id: UUID, document_id: UUID): + dataset_id_str = str(dataset_id) + document_id_str = str(document_id) + dataset = DatasetService.get_dataset(dataset_id_str) if dataset is None: raise NotFound("Dataset not found.") # check user's model setting DatasetService.check_dataset_model_setting(dataset) - document = self.get_document(dataset_id, document_id) + document = self.get_document(dataset_id_str, document_id_str) try: DocumentService.delete_document(document) @@ -1003,10 +1002,10 @@ class DocumentBatchDownloadZipApi(DocumentResource): payload = DocumentBatchDownloadZipPayload.model_validate(console_ns.payload or {}) current_user, current_tenant_id = current_account_with_tenant() - dataset_id = str(dataset_id) + dataset_id_str = str(dataset_id) document_ids: list[str] = [str(document_id) for document_id in payload.document_ids] upload_files, download_name = DocumentService.prepare_document_batch_download_zip( - dataset_id=dataset_id, + dataset_id=dataset_id_str, document_ids=document_ids, tenant_id=current_tenant_id, current_user=current_user, @@ -1044,11 +1043,11 @@ class DocumentProcessingApi(DocumentResource): @login_required @account_initialization_required @cloud_edition_billing_rate_limit_check("knowledge") - def patch(self, dataset_id, document_id, action: Literal["pause", "resume"]): + def patch(self, dataset_id: UUID, document_id: UUID, action: Literal["pause", "resume"]): current_user, _ = current_account_with_tenant() - dataset_id = str(dataset_id) - document_id = str(document_id) - document = self.get_document(dataset_id, document_id) + dataset_id_str = str(dataset_id) + document_id_str = str(document_id) + document = self.get_document(dataset_id_str, document_id_str) # The role of the current user in the ta table must be admin, owner, dataset_operator, or editor if not current_user.is_dataset_editor: @@ -1092,11 +1091,11 @@ class DocumentMetadataApi(DocumentResource): @setup_required @login_required @account_initialization_required - def put(self, dataset_id, document_id): + def put(self, dataset_id: UUID, document_id: UUID): current_user, _ = current_account_with_tenant() - dataset_id = str(dataset_id) - document_id = str(document_id) - document = self.get_document(dataset_id, document_id) + dataset_id_str = str(dataset_id) + document_id_str = str(document_id) + document = self.get_document(dataset_id_str, document_id_str) req_data = DocumentMetadataUpdatePayload.model_validate(request.get_json() or {}) @@ -1141,10 +1140,10 @@ class DocumentStatusApi(DocumentResource): @cloud_edition_billing_resource_check("vector_space") @cloud_edition_billing_rate_limit_check("knowledge") @console_ns.response(200, "Success", console_ns.models[SimpleResultResponse.__name__]) - def patch(self, dataset_id, action: Literal["enable", "disable", "archive", "un_archive"]): + def patch(self, dataset_id: UUID, action: Literal["enable", "disable", "archive", "un_archive"]): current_user, _ = current_account_with_tenant() - dataset_id = str(dataset_id) - dataset = DatasetService.get_dataset(dataset_id) + dataset_id_str = str(dataset_id) + dataset = DatasetService.get_dataset(dataset_id_str) if dataset is None: raise NotFound("Dataset not found.") @@ -1179,16 +1178,16 @@ class DocumentPauseApi(DocumentResource): @account_initialization_required @cloud_edition_billing_rate_limit_check("knowledge") @console_ns.response(204, "Document paused successfully") - def patch(self, dataset_id, document_id): + def patch(self, dataset_id: UUID, document_id: UUID): """pause document.""" - dataset_id = str(dataset_id) - document_id = str(document_id) + dataset_id_str = str(dataset_id) + document_id_str = str(document_id) - dataset = DatasetService.get_dataset(dataset_id) + dataset = DatasetService.get_dataset(dataset_id_str) if not dataset: raise NotFound("Dataset not found.") - document = DocumentService.get_document(dataset.id, document_id) + document = DocumentService.get_document(dataset.id, document_id_str) # 404 if document not found if document is None: @@ -1214,14 +1213,14 @@ class DocumentRecoverApi(DocumentResource): @account_initialization_required @cloud_edition_billing_rate_limit_check("knowledge") @console_ns.response(204, "Document resumed successfully") - def patch(self, dataset_id, document_id): + def patch(self, dataset_id: UUID, document_id: UUID): """recover document.""" - dataset_id = str(dataset_id) - document_id = str(document_id) - dataset = DatasetService.get_dataset(dataset_id) + dataset_id_str = str(dataset_id) + document_id_str = str(document_id) + dataset = DatasetService.get_dataset(dataset_id_str) if not dataset: raise NotFound("Dataset not found.") - document = DocumentService.get_document(dataset.id, document_id) + document = DocumentService.get_document(dataset.id, document_id_str) # 404 if document not found if document is None: @@ -1247,11 +1246,11 @@ class DocumentRetryApi(DocumentResource): @cloud_edition_billing_rate_limit_check("knowledge") @console_ns.expect(console_ns.models[DocumentRetryPayload.__name__]) @console_ns.response(204, "Documents retry started successfully") - def post(self, dataset_id): + def post(self, dataset_id: UUID): """retry document.""" payload = DocumentRetryPayload.model_validate(console_ns.payload or {}) - dataset_id = str(dataset_id) - dataset = DatasetService.get_dataset(dataset_id) + dataset_id_str = str(dataset_id) + dataset = DatasetService.get_dataset(dataset_id_str) retry_documents = [] if not dataset: raise NotFound("Dataset not found.") @@ -1277,7 +1276,7 @@ class DocumentRetryApi(DocumentResource): logger.exception("Failed to retry document, document id: %s", document_id) continue # retry document - DocumentService.retry_document(dataset_id, retry_documents) + DocumentService.retry_document(dataset_id_str, retry_documents) return "", 204 @@ -1289,7 +1288,7 @@ class DocumentRenameApi(DocumentResource): @account_initialization_required @console_ns.response(200, "Document renamed successfully", console_ns.models[DocumentResponse.__name__]) @console_ns.expect(console_ns.models[DocumentRenamePayload.__name__]) - def post(self, dataset_id, document_id): + def post(self, dataset_id: UUID, document_id: UUID): # The role of the current user in the ta table must be admin, owner, editor, or dataset_operator current_user, _ = current_account_with_tenant() if not current_user.is_dataset_editor: @@ -1301,7 +1300,7 @@ class DocumentRenameApi(DocumentResource): payload = DocumentRenamePayload.model_validate(console_ns.payload or {}) try: - document = DocumentService.rename_document(dataset_id, document_id, payload.name) + document = DocumentService.rename_document(str(dataset_id), str(document_id), payload.name) except services.errors.document.DocumentIndexingError: raise DocumentIndexingError("Cannot delete document during indexing.") @@ -1314,15 +1313,15 @@ class WebsiteDocumentSyncApi(DocumentResource): @login_required @account_initialization_required @console_ns.response(200, "Success", console_ns.models[SimpleResultResponse.__name__]) - def get(self, dataset_id, document_id): + def get(self, dataset_id: UUID, document_id: UUID): """sync website document.""" _, current_tenant_id = current_account_with_tenant() - dataset_id = str(dataset_id) - dataset = DatasetService.get_dataset(dataset_id) + dataset_id_str = str(dataset_id) + dataset = DatasetService.get_dataset(dataset_id_str) if not dataset: raise NotFound("Dataset not found.") - document_id = str(document_id) - document = DocumentService.get_document(dataset.id, document_id) + document_id_str = str(document_id) + document = DocumentService.get_document(dataset.id, document_id_str) if not document: raise NotFound("Document not found.") if document.tenant_id != current_tenant_id: @@ -1333,7 +1332,7 @@ class WebsiteDocumentSyncApi(DocumentResource): if DocumentService.check_archived(document): raise ArchivedDocumentImmutableError() # sync document - DocumentService.sync_website_document(dataset_id, document) + DocumentService.sync_website_document(dataset_id_str, document) return {"result": "success"}, 200 @@ -1343,19 +1342,19 @@ class DocumentPipelineExecutionLogApi(DocumentResource): @setup_required @login_required @account_initialization_required - def get(self, dataset_id, document_id): - dataset_id = str(dataset_id) - document_id = str(document_id) + def get(self, dataset_id: UUID, document_id: UUID): + dataset_id_str = str(dataset_id) + document_id_str = str(document_id) - dataset = DatasetService.get_dataset(dataset_id) + dataset = DatasetService.get_dataset(dataset_id_str) if not dataset: raise NotFound("Dataset not found.") - document = DocumentService.get_document(dataset.id, document_id) + document = DocumentService.get_document(dataset.id, document_id_str) if not document: raise NotFound("Document not found.") log = db.session.scalar( select(DocumentPipelineExecutionLog) - .where(DocumentPipelineExecutionLog.document_id == document_id) + .where(DocumentPipelineExecutionLog.document_id == document_id_str) .order_by(DocumentPipelineExecutionLog.created_at.desc()) .limit(1) ) @@ -1392,7 +1391,7 @@ class DocumentGenerateSummaryApi(Resource): @login_required @account_initialization_required @cloud_edition_billing_rate_limit_check("knowledge") - def post(self, dataset_id): + def post(self, dataset_id: UUID): """ Generate summary index for specified documents. @@ -1401,10 +1400,10 @@ class DocumentGenerateSummaryApi(Resource): then asynchronously generates summary indexes for the provided documents. """ current_user, _ = current_account_with_tenant() - dataset_id = str(dataset_id) + dataset_id_str = str(dataset_id) # Get dataset - dataset = DatasetService.get_dataset(dataset_id) + dataset = DatasetService.get_dataset(dataset_id_str) if not dataset: raise NotFound("Dataset not found.") @@ -1438,7 +1437,7 @@ class DocumentGenerateSummaryApi(Resource): raise ValueError("Summary index is not enabled for this dataset. Please enable it in the dataset settings.") # Verify all documents exist and belong to the dataset - documents = DocumentService.get_documents_by_ids(dataset_id, document_list) + documents = DocumentService.get_documents_by_ids(dataset_id_str, document_list) if len(documents) != len(document_list): found_ids = {doc.id for doc in documents} @@ -1452,7 +1451,7 @@ class DocumentGenerateSummaryApi(Resource): if documents_to_update: document_ids_to_update = [str(doc.id) for doc in documents_to_update] DocumentService.update_documents_need_summary( - dataset_id=dataset_id, + dataset_id=dataset_id_str, document_ids=document_ids_to_update, need_summary=True, ) @@ -1465,11 +1464,11 @@ class DocumentGenerateSummaryApi(Resource): continue # Dispatch async task - generate_summary_index_task.delay(dataset_id, document.id) + generate_summary_index_task.delay(dataset_id_str, document.id) logger.info( "Dispatched summary generation task for document %s in dataset %s", document.id, - dataset_id, + dataset_id_str, ) return {"result": "success"}, 200 @@ -1485,7 +1484,7 @@ class DocumentSummaryStatusApi(DocumentResource): @setup_required @login_required @account_initialization_required - def get(self, dataset_id, document_id): + def get(self, dataset_id: UUID, document_id: UUID): """ Get summary index generation status for a document. @@ -1499,11 +1498,11 @@ class DocumentSummaryStatusApi(DocumentResource): - summaries: List of summary records with status and content preview """ current_user, _ = current_account_with_tenant() - dataset_id = str(dataset_id) - document_id = str(document_id) + dataset_id_str = str(dataset_id) + document_id_str = str(document_id) # Get dataset - dataset = DatasetService.get_dataset(dataset_id) + dataset = DatasetService.get_dataset(dataset_id_str) if not dataset: raise NotFound("Dataset not found.") @@ -1517,8 +1516,8 @@ class DocumentSummaryStatusApi(DocumentResource): from services.summary_index_service import SummaryIndexService result = SummaryIndexService.get_document_summary_status_detail( - document_id=document_id, - dataset_id=dataset_id, + document_id=document_id_str, + dataset_id=dataset_id_str, ) return result, 200 diff --git a/api/controllers/console/datasets/datasets_segments.py b/api/controllers/console/datasets/datasets_segments.py index 1d3bc96c1b..38ad7dfdd1 100644 --- a/api/controllers/console/datasets/datasets_segments.py +++ b/api/controllers/console/datasets/datasets_segments.py @@ -1,4 +1,6 @@ import uuid +from typing import Literal +from uuid import UUID from flask import request from flask_restx import Resource, marshal @@ -113,12 +115,12 @@ class DatasetDocumentSegmentListApi(Resource): @setup_required @login_required @account_initialization_required - def get(self, dataset_id, document_id): + def get(self, dataset_id: UUID, document_id: UUID): current_user, current_tenant_id = current_account_with_tenant() - dataset_id = str(dataset_id) - document_id = str(document_id) - dataset = DatasetService.get_dataset(dataset_id) + dataset_id_str = str(dataset_id) + document_id_str = str(document_id) + dataset = DatasetService.get_dataset(dataset_id_str) if not dataset: raise NotFound("Dataset not found.") @@ -127,7 +129,7 @@ class DatasetDocumentSegmentListApi(Resource): except services.errors.account.NoPermissionError as e: raise Forbidden(str(e)) - document = DocumentService.get_document(dataset_id, document_id) + document = DocumentService.get_document(dataset_id_str, document_id_str) if not document: raise NotFound("Document not found.") @@ -148,7 +150,7 @@ class DatasetDocumentSegmentListApi(Resource): query = ( select(DocumentSegment) .where( - DocumentSegment.document_id == str(document_id), + DocumentSegment.document_id == document_id_str, DocumentSegment.tenant_id == current_tenant_id, ) .order_by(DocumentSegment.position.asc()) @@ -201,7 +203,9 @@ class DatasetDocumentSegmentListApi(Resource): if segment_ids: from services.summary_index_service import SummaryIndexService - summary_records = SummaryIndexService.get_segments_summaries(segment_ids=segment_ids, dataset_id=dataset_id) + summary_records = SummaryIndexService.get_segments_summaries( + segment_ids=segment_ids, dataset_id=dataset_id_str + ) # Only include enabled summaries (already filtered by service) summaries = {chunk_id: summary.summary_content for chunk_id, summary in summary_records.items()} @@ -226,19 +230,19 @@ class DatasetDocumentSegmentListApi(Resource): @account_initialization_required @cloud_edition_billing_rate_limit_check("knowledge") @console_ns.response(204, "Segments deleted successfully") - def delete(self, dataset_id, document_id): + def delete(self, dataset_id: UUID, document_id: UUID): current_user, _ = current_account_with_tenant() # check dataset - dataset_id = str(dataset_id) - dataset = DatasetService.get_dataset(dataset_id) + dataset_id_str = str(dataset_id) + dataset = DatasetService.get_dataset(dataset_id_str) if not dataset: raise NotFound("Dataset not found.") # check user's model setting DatasetService.check_dataset_model_setting(dataset) # check document - document_id = str(document_id) - document = DocumentService.get_document(dataset_id, document_id) + document_id_str = str(document_id) + document = DocumentService.get_document(dataset_id_str, document_id_str) if not document: raise NotFound("Document not found.") segment_ids = request.args.getlist("segment_id") @@ -262,15 +266,15 @@ class DatasetDocumentSegmentApi(Resource): @cloud_edition_billing_resource_check("vector_space") @cloud_edition_billing_rate_limit_check("knowledge") @console_ns.response(200, "Success", console_ns.models[SimpleResultResponse.__name__]) - def patch(self, dataset_id, document_id, action): + def patch(self, dataset_id: UUID, document_id: UUID, action: Literal["enable", "disable"]): current_user, current_tenant_id = current_account_with_tenant() - dataset_id = str(dataset_id) - dataset = DatasetService.get_dataset(dataset_id) + dataset_id_str = str(dataset_id) + dataset = DatasetService.get_dataset(dataset_id_str) if not dataset: raise NotFound("Dataset not found.") - document_id = str(document_id) - document = DocumentService.get_document(dataset_id, document_id) + document_id_str = str(document_id) + document = DocumentService.get_document(dataset_id_str, document_id_str) if not document: raise NotFound("Document not found.") # check user's model setting @@ -321,17 +325,17 @@ class DatasetDocumentSegmentAddApi(Resource): @cloud_edition_billing_knowledge_limit_check("add_segment") @cloud_edition_billing_rate_limit_check("knowledge") @console_ns.expect(console_ns.models[SegmentCreatePayload.__name__]) - def post(self, dataset_id, document_id): + def post(self, dataset_id: UUID, document_id: UUID): current_user, current_tenant_id = current_account_with_tenant() # check dataset - dataset_id = str(dataset_id) - dataset = DatasetService.get_dataset(dataset_id) + dataset_id_str = str(dataset_id) + dataset = DatasetService.get_dataset(dataset_id_str) if not dataset: raise NotFound("Dataset not found.") # check document - document_id = str(document_id) - document = DocumentService.get_document(dataset_id, document_id) + document_id_str = str(document_id) + document = DocumentService.get_document(dataset_id_str, document_id_str) if not document: raise NotFound("Document not found.") if not current_user.is_dataset_editor: @@ -361,7 +365,7 @@ class DatasetDocumentSegmentAddApi(Resource): payload_dict = payload.model_dump(exclude_none=True) SegmentService.segment_create_args_validate(payload_dict, document) segment = SegmentService.create_segment(payload_dict, document, dataset) - return {"data": _get_segment_with_summary(segment, dataset_id), "doc_form": document.doc_form}, 200 + return {"data": _get_segment_with_summary(segment, dataset_id_str), "doc_form": document.doc_form}, 200 @console_ns.route("/datasets//documents//segments/") @@ -372,19 +376,19 @@ class DatasetDocumentSegmentUpdateApi(Resource): @cloud_edition_billing_resource_check("vector_space") @cloud_edition_billing_rate_limit_check("knowledge") @console_ns.expect(console_ns.models[SegmentUpdatePayload.__name__]) - def patch(self, dataset_id, document_id, segment_id): + def patch(self, dataset_id: UUID, document_id: UUID, segment_id: UUID): current_user, current_tenant_id = current_account_with_tenant() # check dataset - dataset_id = str(dataset_id) - dataset = DatasetService.get_dataset(dataset_id) + dataset_id_str = str(dataset_id) + dataset = DatasetService.get_dataset(dataset_id_str) if not dataset: raise NotFound("Dataset not found.") # check user's model setting DatasetService.check_dataset_model_setting(dataset) # check document - document_id = str(document_id) - document = DocumentService.get_document(dataset_id, document_id) + document_id_str = str(document_id) + document = DocumentService.get_document(dataset_id_str, document_id_str) if not document: raise NotFound("Document not found.") if dataset.indexing_technique == IndexTechniqueType.HIGH_QUALITY: @@ -404,10 +408,10 @@ class DatasetDocumentSegmentUpdateApi(Resource): except ProviderTokenNotInitError as ex: raise ProviderNotInitializeError(ex.description) # check segment - segment_id = str(segment_id) + segment_id_str = str(segment_id) segment = db.session.scalar( select(DocumentSegment) - .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_tenant_id) + .where(DocumentSegment.id == segment_id_str, DocumentSegment.tenant_id == current_tenant_id) .limit(1) ) if not segment: @@ -428,33 +432,33 @@ class DatasetDocumentSegmentUpdateApi(Resource): segment = SegmentService.update_segment( SegmentUpdateArgs.model_validate(payload.model_dump(exclude_none=True)), segment, document, dataset ) - return {"data": _get_segment_with_summary(segment, dataset_id), "doc_form": document.doc_form}, 200 + return {"data": _get_segment_with_summary(segment, dataset_id_str), "doc_form": document.doc_form}, 200 @setup_required @login_required @account_initialization_required @cloud_edition_billing_rate_limit_check("knowledge") @console_ns.response(204, "Segment deleted successfully") - def delete(self, dataset_id, document_id, segment_id): + def delete(self, dataset_id: UUID, document_id: UUID, segment_id: UUID): current_user, current_tenant_id = current_account_with_tenant() # check dataset - dataset_id = str(dataset_id) - dataset = DatasetService.get_dataset(dataset_id) + dataset_id_str = str(dataset_id) + dataset = DatasetService.get_dataset(dataset_id_str) if not dataset: raise NotFound("Dataset not found.") # check user's model setting DatasetService.check_dataset_model_setting(dataset) # check document - document_id = str(document_id) - document = DocumentService.get_document(dataset_id, document_id) + document_id_str = str(document_id) + document = DocumentService.get_document(dataset_id_str, document_id_str) if not document: raise NotFound("Document not found.") # check segment - segment_id = str(segment_id) + segment_id_str = str(segment_id) segment = db.session.scalar( select(DocumentSegment) - .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_tenant_id) + .where(DocumentSegment.id == segment_id_str, DocumentSegment.tenant_id == current_tenant_id) .limit(1) ) if not segment: @@ -483,17 +487,17 @@ class DatasetDocumentSegmentBatchImportApi(Resource): @cloud_edition_billing_knowledge_limit_check("add_segment") @cloud_edition_billing_rate_limit_check("knowledge") @console_ns.expect(console_ns.models[BatchImportPayload.__name__]) - def post(self, dataset_id, document_id): + def post(self, dataset_id: UUID, document_id: UUID): current_user, current_tenant_id = current_account_with_tenant() # check dataset - dataset_id = str(dataset_id) - dataset = DatasetService.get_dataset(dataset_id) + dataset_id_str = str(dataset_id) + dataset = DatasetService.get_dataset(dataset_id_str) if not dataset: raise NotFound("Dataset not found.") # check document - document_id = str(document_id) - document = DocumentService.get_document(dataset_id, document_id) + document_id_str = str(document_id) + document = DocumentService.get_document(dataset_id_str, document_id_str) if not document: raise NotFound("Document not found.") @@ -517,8 +521,8 @@ class DatasetDocumentSegmentBatchImportApi(Resource): batch_create_segment_to_index_task.delay( str(job_id), upload_file_id, - dataset_id, - document_id, + dataset_id_str, + document_id_str, current_tenant_id, current_user.id, ) @@ -530,7 +534,7 @@ class DatasetDocumentSegmentBatchImportApi(Resource): @setup_required @login_required @account_initialization_required - def get(self, job_id=None, dataset_id=None, document_id=None): + def get(self, job_id=None, dataset_id: UUID | None = None, document_id: UUID | None = None): if job_id is None: raise NotFound("The job does not exist.") job_id = str(job_id) @@ -551,24 +555,24 @@ class ChildChunkAddApi(Resource): @cloud_edition_billing_knowledge_limit_check("add_segment") @cloud_edition_billing_rate_limit_check("knowledge") @console_ns.expect(console_ns.models[ChildChunkCreatePayload.__name__]) - def post(self, dataset_id, document_id, segment_id): + def post(self, dataset_id: UUID, document_id: UUID, segment_id: UUID): current_user, current_tenant_id = current_account_with_tenant() # check dataset - dataset_id = str(dataset_id) - dataset = DatasetService.get_dataset(dataset_id) + dataset_id_str = str(dataset_id) + dataset = DatasetService.get_dataset(dataset_id_str) if not dataset: raise NotFound("Dataset not found.") # check document - document_id = str(document_id) - document = DocumentService.get_document(dataset_id, document_id) + document_id_str = str(document_id) + document = DocumentService.get_document(dataset_id_str, document_id_str) if not document: raise NotFound("Document not found.") # check segment - segment_id = str(segment_id) + segment_id_str = str(segment_id) segment = db.session.scalar( select(DocumentSegment) - .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_tenant_id) + .where(DocumentSegment.id == segment_id_str, DocumentSegment.tenant_id == current_tenant_id) .limit(1) ) if not segment: @@ -606,26 +610,26 @@ class ChildChunkAddApi(Resource): @setup_required @login_required @account_initialization_required - def get(self, dataset_id, document_id, segment_id): + def get(self, dataset_id: UUID, document_id: UUID, segment_id: UUID): _, current_tenant_id = current_account_with_tenant() # check dataset - dataset_id = str(dataset_id) - dataset = DatasetService.get_dataset(dataset_id) + dataset_id_str = str(dataset_id) + dataset = DatasetService.get_dataset(dataset_id_str) if not dataset: raise NotFound("Dataset not found.") # check user's model setting DatasetService.check_dataset_model_setting(dataset) # check document - document_id = str(document_id) - document = DocumentService.get_document(dataset_id, document_id) + document_id_str = str(document_id) + document = DocumentService.get_document(dataset_id_str, document_id_str) if not document: raise NotFound("Document not found.") # check segment - segment_id = str(segment_id) + segment_id_str = str(segment_id) segment = db.session.scalar( select(DocumentSegment) - .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_tenant_id) + .where(DocumentSegment.id == segment_id_str, DocumentSegment.tenant_id == current_tenant_id) .limit(1) ) if not segment: @@ -642,7 +646,9 @@ class ChildChunkAddApi(Resource): limit = min(args.limit, 100) keyword = args.keyword - child_chunks = SegmentService.get_child_chunks(segment_id, document_id, dataset_id, page, limit, keyword) + child_chunks = SegmentService.get_child_chunks( + segment_id_str, document_id_str, dataset_id_str, page, limit, keyword + ) return { "data": marshal(child_chunks.items, child_chunk_fields), "total": child_chunks.total, @@ -656,26 +662,26 @@ class ChildChunkAddApi(Resource): @account_initialization_required @cloud_edition_billing_resource_check("vector_space") @cloud_edition_billing_rate_limit_check("knowledge") - def patch(self, dataset_id, document_id, segment_id): + def patch(self, dataset_id: UUID, document_id: UUID, segment_id: UUID): current_user, current_tenant_id = current_account_with_tenant() # check dataset - dataset_id = str(dataset_id) - dataset = DatasetService.get_dataset(dataset_id) + dataset_id_str = str(dataset_id) + dataset = DatasetService.get_dataset(dataset_id_str) if not dataset: raise NotFound("Dataset not found.") # check user's model setting DatasetService.check_dataset_model_setting(dataset) # check document - document_id = str(document_id) - document = DocumentService.get_document(dataset_id, document_id) + document_id_str = str(document_id) + document = DocumentService.get_document(dataset_id_str, document_id_str) if not document: raise NotFound("Document not found.") # check segment - segment_id = str(segment_id) + segment_id_str = str(segment_id) segment = db.session.scalar( select(DocumentSegment) - .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_tenant_id) + .where(DocumentSegment.id == segment_id_str, DocumentSegment.tenant_id == current_tenant_id) .limit(1) ) if not segment: @@ -705,39 +711,39 @@ class ChildChunkUpdateApi(Resource): @account_initialization_required @cloud_edition_billing_rate_limit_check("knowledge") @console_ns.response(204, "Child chunk deleted successfully") - def delete(self, dataset_id, document_id, segment_id, child_chunk_id): + def delete(self, dataset_id: UUID, document_id: UUID, segment_id: UUID, child_chunk_id: UUID): current_user, current_tenant_id = current_account_with_tenant() # check dataset - dataset_id = str(dataset_id) - dataset = DatasetService.get_dataset(dataset_id) + dataset_id_str = str(dataset_id) + dataset = DatasetService.get_dataset(dataset_id_str) if not dataset: raise NotFound("Dataset not found.") # check user's model setting DatasetService.check_dataset_model_setting(dataset) # check document - document_id = str(document_id) - document = DocumentService.get_document(dataset_id, document_id) + document_id_str = str(document_id) + document = DocumentService.get_document(dataset_id_str, document_id_str) if not document: raise NotFound("Document not found.") # check segment - segment_id = str(segment_id) + segment_id_str = str(segment_id) segment = db.session.scalar( select(DocumentSegment) - .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_tenant_id) + .where(DocumentSegment.id == segment_id_str, DocumentSegment.tenant_id == current_tenant_id) .limit(1) ) if not segment: raise NotFound("Segment not found.") # check child chunk - child_chunk_id = str(child_chunk_id) + child_chunk_id_str = str(child_chunk_id) child_chunk = db.session.scalar( select(ChildChunk) .where( - ChildChunk.id == str(child_chunk_id), + ChildChunk.id == str(child_chunk_id_str), ChildChunk.tenant_id == current_tenant_id, ChildChunk.segment_id == segment.id, - ChildChunk.document_id == document_id, + ChildChunk.document_id == document_id_str, ) .limit(1) ) @@ -762,39 +768,39 @@ class ChildChunkUpdateApi(Resource): @cloud_edition_billing_resource_check("vector_space") @cloud_edition_billing_rate_limit_check("knowledge") @console_ns.expect(console_ns.models[ChildChunkUpdatePayload.__name__]) - def patch(self, dataset_id, document_id, segment_id, child_chunk_id): + def patch(self, dataset_id: UUID, document_id: UUID, segment_id: UUID, child_chunk_id: UUID): current_user, current_tenant_id = current_account_with_tenant() # check dataset - dataset_id = str(dataset_id) - dataset = DatasetService.get_dataset(dataset_id) + dataset_id_str = str(dataset_id) + dataset = DatasetService.get_dataset(dataset_id_str) if not dataset: raise NotFound("Dataset not found.") # check user's model setting DatasetService.check_dataset_model_setting(dataset) # check document - document_id = str(document_id) - document = DocumentService.get_document(dataset_id, document_id) + document_id_str = str(document_id) + document = DocumentService.get_document(dataset_id_str, document_id_str) if not document: raise NotFound("Document not found.") # check segment - segment_id = str(segment_id) + segment_id_str = str(segment_id) segment = db.session.scalar( select(DocumentSegment) - .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_tenant_id) + .where(DocumentSegment.id == segment_id_str, DocumentSegment.tenant_id == current_tenant_id) .limit(1) ) if not segment: raise NotFound("Segment not found.") # check child chunk - child_chunk_id = str(child_chunk_id) + child_chunk_id_str = str(child_chunk_id) child_chunk = db.session.scalar( select(ChildChunk) .where( - ChildChunk.id == str(child_chunk_id), + ChildChunk.id == str(child_chunk_id_str), ChildChunk.tenant_id == current_tenant_id, ChildChunk.segment_id == segment.id, - ChildChunk.document_id == document_id, + ChildChunk.document_id == document_id_str, ) .limit(1) ) diff --git a/api/controllers/console/datasets/external.py b/api/controllers/console/datasets/external.py index d1cdc15d0b..d6cc176a39 100644 --- a/api/controllers/console/datasets/external.py +++ b/api/controllers/console/datasets/external.py @@ -1,3 +1,5 @@ +from uuid import UUID + from flask import request from flask_restx import Resource, fields, marshal from pydantic import BaseModel, Field @@ -175,11 +177,11 @@ class ExternalApiTemplateApi(Resource): @setup_required @login_required @account_initialization_required - def get(self, external_knowledge_api_id): + def get(self, external_knowledge_api_id: UUID): _, current_tenant_id = current_account_with_tenant() - external_knowledge_api_id = str(external_knowledge_api_id) + external_knowledge_api_id_str = str(external_knowledge_api_id) external_knowledge_api = ExternalDatasetService.get_external_knowledge_api( - external_knowledge_api_id, current_tenant_id + external_knowledge_api_id_str, current_tenant_id ) if external_knowledge_api is None: raise NotFound("API template not found.") @@ -190,9 +192,9 @@ class ExternalApiTemplateApi(Resource): @login_required @account_initialization_required @console_ns.expect(console_ns.models[ExternalKnowledgeApiPayload.__name__]) - def patch(self, external_knowledge_api_id): + def patch(self, external_knowledge_api_id: UUID): current_user, current_tenant_id = current_account_with_tenant() - external_knowledge_api_id = str(external_knowledge_api_id) + external_knowledge_api_id_str = str(external_knowledge_api_id) payload = ExternalKnowledgeApiPayload.model_validate(console_ns.payload or {}) ExternalDatasetService.validate_api_list(payload.settings) @@ -200,7 +202,7 @@ class ExternalApiTemplateApi(Resource): external_knowledge_api = ExternalDatasetService.update_external_knowledge_api( tenant_id=current_tenant_id, user_id=current_user.id, - external_knowledge_api_id=external_knowledge_api_id, + external_knowledge_api_id=external_knowledge_api_id_str, args=payload.model_dump(), ) @@ -210,14 +212,14 @@ class ExternalApiTemplateApi(Resource): @login_required @account_initialization_required @console_ns.response(204, "External knowledge API deleted successfully") - def delete(self, external_knowledge_api_id): + def delete(self, external_knowledge_api_id: UUID): current_user, current_tenant_id = current_account_with_tenant() - external_knowledge_api_id = str(external_knowledge_api_id) + external_knowledge_api_id_str = str(external_knowledge_api_id) if not (current_user.has_edit_permission or current_user.is_dataset_operator): raise Forbidden() - ExternalDatasetService.delete_external_knowledge_api(current_tenant_id, external_knowledge_api_id) + ExternalDatasetService.delete_external_knowledge_api(current_tenant_id, external_knowledge_api_id_str) return "", 204 @@ -230,12 +232,12 @@ class ExternalApiUseCheckApi(Resource): @setup_required @login_required @account_initialization_required - def get(self, external_knowledge_api_id): + def get(self, external_knowledge_api_id: UUID): _, current_tenant_id = current_account_with_tenant() - external_knowledge_api_id = str(external_knowledge_api_id) + external_knowledge_api_id_str = str(external_knowledge_api_id) external_knowledge_api_is_using, count = ExternalDatasetService.external_knowledge_api_use_check( - external_knowledge_api_id, current_tenant_id + external_knowledge_api_id_str, current_tenant_id ) return {"is_using": external_knowledge_api_is_using, "count": count}, 200 @@ -286,7 +288,7 @@ class ExternalKnowledgeHitTestingApi(Resource): @setup_required @login_required @account_initialization_required - def post(self, dataset_id): + def post(self, dataset_id: UUID): current_user, _ = current_account_with_tenant() dataset_id_str = str(dataset_id) dataset = DatasetService.get_dataset(dataset_id_str) diff --git a/api/controllers/console/datasets/hit_testing.py b/api/controllers/console/datasets/hit_testing.py index 8758f983ee..110a2e16f5 100644 --- a/api/controllers/console/datasets/hit_testing.py +++ b/api/controllers/console/datasets/hit_testing.py @@ -2,6 +2,7 @@ from __future__ import annotations from datetime import datetime from typing import Any +from uuid import UUID from flask_restx import Resource from pydantic import Field, field_validator @@ -118,7 +119,7 @@ class HitTestingApi(Resource, DatasetsHitTestingBase): @login_required @account_initialization_required @cloud_edition_billing_rate_limit_check("knowledge") - def post(self, dataset_id): + def post(self, dataset_id: UUID): dataset_id_str = str(dataset_id) dataset = self.get_and_validate_dataset(dataset_id_str) diff --git a/api/controllers/console/datasets/metadata.py b/api/controllers/console/datasets/metadata.py index 4de5f32fb8..cf516aa63b 100644 --- a/api/controllers/console/datasets/metadata.py +++ b/api/controllers/console/datasets/metadata.py @@ -1,4 +1,5 @@ from typing import Literal +from uuid import UUID from flask_restx import Resource from werkzeug.exceptions import NotFound @@ -42,7 +43,7 @@ class DatasetMetadataCreateApi(Resource): @enterprise_license_required @console_ns.response(201, "Metadata created successfully", console_ns.models[DatasetMetadataResponse.__name__]) @console_ns.expect(console_ns.models[MetadataArgs.__name__]) - def post(self, dataset_id): + def post(self, dataset_id: UUID): current_user, _ = current_account_with_tenant() metadata_args = MetadataArgs.model_validate(console_ns.payload or {}) @@ -62,7 +63,7 @@ class DatasetMetadataCreateApi(Resource): @console_ns.response( 200, "Metadata retrieved successfully", console_ns.models[DatasetMetadataListResponse.__name__] ) - def get(self, dataset_id): + def get(self, dataset_id: UUID): dataset_id_str = str(dataset_id) dataset = DatasetService.get_dataset(dataset_id_str) if dataset is None: @@ -79,7 +80,7 @@ class DatasetMetadataApi(Resource): @enterprise_license_required @console_ns.response(200, "Metadata updated successfully", console_ns.models[DatasetMetadataResponse.__name__]) @console_ns.expect(console_ns.models[MetadataUpdatePayload.__name__]) - def patch(self, dataset_id, metadata_id): + def patch(self, dataset_id: UUID, metadata_id: UUID): current_user, _ = current_account_with_tenant() payload = MetadataUpdatePayload.model_validate(console_ns.payload or {}) name = payload.name @@ -99,7 +100,7 @@ class DatasetMetadataApi(Resource): @account_initialization_required @enterprise_license_required @console_ns.response(204, "Metadata deleted successfully") - def delete(self, dataset_id, metadata_id): + def delete(self, dataset_id: UUID, metadata_id: UUID): current_user, _ = current_account_with_tenant() dataset_id_str = str(dataset_id) metadata_id_str = str(metadata_id) @@ -136,7 +137,7 @@ class DatasetMetadataBuiltInFieldActionApi(Resource): @account_initialization_required @enterprise_license_required @console_ns.response(204, "Action completed successfully") - def post(self, dataset_id, action: Literal["enable", "disable"]): + def post(self, dataset_id: UUID, action: Literal["enable", "disable"]): current_user, _ = current_account_with_tenant() dataset_id_str = str(dataset_id) dataset = DatasetService.get_dataset(dataset_id_str) @@ -164,7 +165,7 @@ class DocumentMetadataEditApi(Resource): 204, "Documents metadata updated successfully", ) - def post(self, dataset_id): + def post(self, dataset_id: UUID): current_user, _ = current_account_with_tenant() dataset_id_str = str(dataset_id) dataset = DatasetService.get_dataset(dataset_id_str) diff --git a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_import.py b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_import.py index cf92218508..3ae5d308c2 100644 --- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_import.py +++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_import.py @@ -105,7 +105,7 @@ class RagPipelineImportConfirmApi(Resource): @account_initialization_required @edit_permission_required @marshal_with(pipeline_import_model) - def post(self, import_id): + def post(self, import_id: str): current_user, _ = current_account_with_tenant() with Session(db.engine, expire_on_commit=False) as session: diff --git a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py index 25e8b060b8..a7727513df 100644 --- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py +++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py @@ -1,6 +1,7 @@ import json import logging from typing import Any, Literal, cast +from uuid import UUID from flask import abort, request from flask_restx import Resource @@ -875,14 +876,14 @@ class RagPipelineWorkflowRunDetailApi(Resource): @login_required @account_initialization_required @get_rag_pipeline - def get(self, pipeline: Pipeline, run_id): + def get(self, pipeline: Pipeline, run_id: UUID): """ Get workflow run detail """ - run_id = str(run_id) + run_id_str = str(run_id) rag_pipeline_service = RagPipelineService() - workflow_run = rag_pipeline_service.get_rag_pipeline_workflow_run(pipeline=pipeline, run_id=run_id) + workflow_run = rag_pipeline_service.get_rag_pipeline_workflow_run(pipeline=pipeline, run_id=run_id_str) if workflow_run is None: raise NotFound("Workflow run not found") @@ -904,13 +905,13 @@ class RagPipelineWorkflowRunNodeExecutionListApi(Resource): """ Get workflow run node execution list """ - run_id = str(run_id) + run_id_str = str(run_id) rag_pipeline_service = RagPipelineService() user = cast("Account | EndUser", current_user) node_executions = rag_pipeline_service.get_rag_pipeline_workflow_run_node_executions( pipeline=pipeline, - run_id=run_id, + run_id=run_id_str, user=user, ) @@ -960,15 +961,15 @@ class RagPipelineTransformApi(Resource): @setup_required @login_required @account_initialization_required - def post(self, dataset_id: str): + def post(self, dataset_id: UUID): current_user, _ = current_account_with_tenant() if not (current_user.has_edit_permission or current_user.is_dataset_operator): raise Forbidden() - dataset_id = str(dataset_id) + dataset_id_str = str(dataset_id) rag_pipeline_transform_service = RagPipelineTransformService() - result = rag_pipeline_transform_service.transform_dataset(dataset_id) + result = rag_pipeline_transform_service.transform_dataset(dataset_id_str) return result