feat: extract model runtime

Signed-off-by: -LAN- <laipz8200@outlook.com>
This commit is contained in:
-LAN-
2026-03-15 15:34:47 +08:00
parent 3d5a29462e
commit fbb74a4af9
178 changed files with 4343 additions and 2134 deletions

View File

@@ -65,7 +65,7 @@ class DatasetMultiRetrieverTool(DatasetRetrieverBaseTool):
for thread in threads:
thread.join()
# do rerank for searched documents
model_manager = ModelManager()
model_manager = ModelManager.for_tenant(tenant_id=self.tenant_id)
rerank_model_instance = model_manager.get_model_instance(
tenant_id=self.tenant_id,
provider=self.reranking_provider_name,

View File

@@ -37,7 +37,7 @@ class ModelInvocationUtils:
"""
get max llm context tokens of the model
"""
model_manager = ModelManager()
model_manager = ModelManager.for_tenant(tenant_id=tenant_id)
model_instance = model_manager.get_default_model_instance(
tenant_id=tenant_id,
model_type=ModelType.LLM,
@@ -65,7 +65,7 @@ class ModelInvocationUtils:
"""
# get model instance
model_manager = ModelManager()
model_manager = ModelManager.for_tenant(tenant_id=tenant_id)
model_instance = model_manager.get_default_model_instance(tenant_id=tenant_id, model_type=ModelType.LLM)
if not model_instance:
@@ -92,7 +92,7 @@ class ModelInvocationUtils:
"""
# get model manager
model_manager = ModelManager()
model_manager = ModelManager.for_tenant(tenant_id=tenant_id)
# get model instance
model_instance = model_manager.get_default_model_instance(
tenant_id=tenant_id,
@@ -136,7 +136,6 @@ class ModelInvocationUtils:
tools=[],
stop=[],
stream=False,
user=user_id,
callbacks=[],
)
except InvokeRateLimitError as e: