Files
dify/api/core/plugin/utils/chunk_merger.py
Stream e0082dbf18 revert: add tools for output in agent mode
feat: hide output tools and improve JSON formatting for structured output
feat: hide output tools and improve JSON formatting for structured output
fix: handle prompt template correctly to extract selectors for step run
fix: emit StreamChunkEvent correctly for sandbox agent
chore: better debug message
fix: incorrect output tool runtime selection
fix: type issues
fix: align parameter list
fix: align parameter list
fix: hide internal builtin providers from tool list
vibe: implement file structured output
vibe: implement file structured output
fix: refix parameter for tool
fix: crash
fix: crash
refactor: remove union types
fix: type check
Merge branch 'feat/structured-output-with-sandbox' into feat/support-agent-sandbox
fix: provide json as text
fix: provide json as text
fix: get AgentResult correctly
fix: provides correct prompts, tools and terminal predicates
fix: provides correct prompts, tools and terminal predicates
fix: circular import
feat: support structured output in sandbox and tool mode
2026-02-04 21:43:53 +08:00

96 lines
3.7 KiB
Python

from collections.abc import Generator
from dataclasses import dataclass, field
from typing import TypeVar, Union
from core.agent.entities import AgentInvokeMessage
from core.tools.entities.tool_entities import ToolInvokeMessage
MessageType = TypeVar("MessageType", bound=Union[ToolInvokeMessage, AgentInvokeMessage])
@dataclass
class FileChunk:
"""
Buffer for accumulating file chunks during streaming.
"""
total_length: int
bytes_written: int = field(default=0, init=False)
data: bytearray = field(init=False)
def __post_init__(self):
self.data = bytearray(self.total_length)
def merge_blob_chunks(
response: Generator[MessageType, None, None],
max_file_size: int = 30 * 1024 * 1024,
max_chunk_size: int = 8192,
) -> Generator[MessageType, None, None]:
"""
Merge streaming blob chunks into complete blob messages.
This function processes a stream of plugin invoke messages, accumulating
BLOB_CHUNK messages by their ID until the final chunk is received,
then yielding a single complete BLOB message.
Args:
response: Generator yielding messages that may include blob chunks
max_file_size: Maximum allowed file size in bytes (default: 30MB)
max_chunk_size: Maximum allowed chunk size in bytes (default: 8KB)
Yields:
Messages from the response stream, with blob chunks merged into complete blobs
Raises:
ValueError: If file size exceeds max_file_size or chunk size exceeds max_chunk_size
"""
files: dict[str, FileChunk] = {}
for resp in response:
if resp.type == ToolInvokeMessage.MessageType.BLOB_CHUNK:
assert isinstance(resp.message, ToolInvokeMessage.BlobChunkMessage)
# Get blob chunk information
chunk_id = resp.message.id
total_length = resp.message.total_length
blob_data = resp.message.blob
is_end = resp.message.end
# Initialize buffer for this file if it doesn't exist
if chunk_id not in files:
files[chunk_id] = FileChunk(total_length)
# Check if file is too large (before appending)
if files[chunk_id].bytes_written + len(blob_data) > max_file_size:
# Delete the file if it's too large
del files[chunk_id]
raise ValueError(f"File is too large which reached the limit of {max_file_size / 1024 / 1024}MB")
# Check if single chunk is too large
if len(blob_data) > max_chunk_size:
raise ValueError(f"File chunk is too large which reached the limit of {max_chunk_size / 1024}KB")
# Append the blob data to the buffer
files[chunk_id].data[files[chunk_id].bytes_written : files[chunk_id].bytes_written + len(blob_data)] = (
blob_data
)
files[chunk_id].bytes_written += len(blob_data)
# If this is the final chunk, yield a complete blob message
if is_end:
# Create the appropriate message type based on the response type
message_class = type(resp)
merged_message = message_class(
type=ToolInvokeMessage.MessageType.BLOB,
message=ToolInvokeMessage.BlobMessage(
blob=bytes(files[chunk_id].data[: files[chunk_id].bytes_written])
),
meta=resp.meta,
)
assert isinstance(merged_message, (ToolInvokeMessage, AgentInvokeMessage))
yield merged_message # type: ignore
# Clean up the buffer
del files[chunk_id]
else:
yield resp