Files
dify/api/extensions/otel/parser/base.py
WH-2099 b4c0fb9463 refactor(workflow-file): phase2 replace remaining imports and remove core.file
Complete phase 2 of the file module migration by replacing the remaining repository-wide legacy imports and deleting the temporary core.file compatibility package introduced in phase 1.

What this commit changes
- Replace legacy core.file.* imports with core.workflow.file.* across:
  - controllers
  - core app/agent/datasource/prompt/rag/tools/variables
  - factories, fields, libs, models, services
  - otel parser integration points
  - unit and integration tests that referenced legacy paths
- Migrate residual runtime usages in app/task pipeline paths that still referenced core.file symbols.
- Update tests and model serialization helpers that relied on old module paths.
- Remove the compatibility bridge package entirely:
  - delete core/file/__init__.py
  - delete core/file/constants.py
  - delete core/file/enums.py
  - delete core/file/file_manager.py
  - delete core/file/helpers.py
  - delete core/file/models.py
  - delete core/file/tool_file_parser.py

Verification
- No Python references to core.file remain ( -> empty).
- Targeted regression tests for migrated file primitives and factory/type flows passed:
  - tests/unit_tests/core/test_file.py
  - tests/unit_tests/factories/test_variable_factory.py
  - tests/unit_tests/services/test_variable_truncator.py

Result
- The repository now uses core.workflow.file as the single canonical file namespace.
- The migration is fully split into two commits: phase 1 compatibility + phase 2 full cutover.
2026-02-12 15:34:20 +08:00

118 lines
4.4 KiB
Python

"""
Base parser interface and utilities for OpenTelemetry node parsers.
"""
import json
from typing import Any, Protocol
from opentelemetry.trace import Span
from opentelemetry.trace.status import Status, StatusCode
from pydantic import BaseModel
from core.variables import Segment
from core.workflow.enums import NodeType
from core.workflow.file.models import File
from core.workflow.graph_events import GraphNodeEventBase
from core.workflow.nodes.base.node import Node
from extensions.otel.semconv.gen_ai import ChainAttributes, GenAIAttributes
def safe_json_dumps(obj: Any, ensure_ascii: bool = False) -> str:
"""
Safely serialize objects to JSON, handling non-serializable types.
Handles:
- Segment types (ArrayFileSegment, FileSegment, etc.) - converts to their value
- File objects - converts to dict using to_dict()
- BaseModel objects - converts using model_dump()
- Other types - falls back to str() representation
Args:
obj: Object to serialize
ensure_ascii: Whether to ensure ASCII encoding
Returns:
JSON string representation of the object
"""
def _convert_value(value: Any) -> Any:
"""Recursively convert non-serializable values."""
if value is None:
return None
if isinstance(value, (bool, int, float, str)):
return value
if isinstance(value, Segment):
# Convert Segment to its underlying value
return _convert_value(value.value)
if isinstance(value, File):
# Convert File to dict
return value.to_dict()
if isinstance(value, BaseModel):
# Convert Pydantic model to dict
return _convert_value(value.model_dump(mode="json"))
if isinstance(value, dict):
return {k: _convert_value(v) for k, v in value.items()}
if isinstance(value, (list, tuple)):
return [_convert_value(item) for item in value]
# Fallback to string representation for unknown types
return str(value)
try:
converted = _convert_value(obj)
return json.dumps(converted, ensure_ascii=ensure_ascii)
except (TypeError, ValueError) as e:
# If conversion still fails, return error message as string
return json.dumps(
{"error": f"Failed to serialize: {type(obj).__name__}", "message": str(e)}, ensure_ascii=ensure_ascii
)
class NodeOTelParser(Protocol):
"""Parser interface for node-specific OpenTelemetry enrichment."""
def parse(
self, *, node: Node, span: "Span", error: Exception | None, result_event: GraphNodeEventBase | None = None
) -> None: ...
class DefaultNodeOTelParser:
"""Fallback parser used when no node-specific parser is registered."""
def parse(
self, *, node: Node, span: "Span", error: Exception | None, result_event: GraphNodeEventBase | None = None
) -> None:
span.set_attribute("node.id", node.id)
if node.execution_id:
span.set_attribute("node.execution_id", node.execution_id)
if hasattr(node, "node_type") and node.node_type:
span.set_attribute("node.type", node.node_type.value)
span.set_attribute(GenAIAttributes.FRAMEWORK, "dify")
node_type = getattr(node, "node_type", None)
if isinstance(node_type, NodeType):
if node_type == NodeType.LLM:
span.set_attribute(GenAIAttributes.SPAN_KIND, "LLM")
elif node_type == NodeType.KNOWLEDGE_RETRIEVAL:
span.set_attribute(GenAIAttributes.SPAN_KIND, "RETRIEVER")
elif node_type == NodeType.TOOL:
span.set_attribute(GenAIAttributes.SPAN_KIND, "TOOL")
else:
span.set_attribute(GenAIAttributes.SPAN_KIND, "TASK")
else:
span.set_attribute(GenAIAttributes.SPAN_KIND, "TASK")
# Extract inputs and outputs from result_event
if result_event and result_event.node_run_result:
node_run_result = result_event.node_run_result
if node_run_result.inputs:
span.set_attribute(ChainAttributes.INPUT_VALUE, safe_json_dumps(node_run_result.inputs))
if node_run_result.outputs:
span.set_attribute(ChainAttributes.OUTPUT_VALUE, safe_json_dumps(node_run_result.outputs))
if error:
span.record_exception(error)
span.set_status(Status(StatusCode.ERROR, str(error)))
else:
span.set_status(Status(StatusCode.OK))