diff --git a/.gitignore b/.gitignore index 79ba44b207..5ad728c3da 100644 --- a/.gitignore +++ b/.gitignore @@ -189,6 +189,7 @@ docker/volumes/matrixone/* docker/volumes/mysql/* docker/volumes/seekdb/* !docker/volumes/oceanbase/init.d +docker/volumes/iris/* docker/nginx/conf.d/default.conf docker/nginx/ssl/* diff --git a/api/configs/middleware/__init__.py b/api/configs/middleware/__init__.py index a5e35c99ca..c4390ffaab 100644 --- a/api/configs/middleware/__init__.py +++ b/api/configs/middleware/__init__.py @@ -26,6 +26,7 @@ from .vdb.clickzetta_config import ClickzettaConfig from .vdb.couchbase_config import CouchbaseConfig from .vdb.elasticsearch_config import ElasticsearchConfig from .vdb.huawei_cloud_config import HuaweiCloudConfig +from .vdb.iris_config import IrisVectorConfig from .vdb.lindorm_config import LindormConfig from .vdb.matrixone_config import MatrixoneConfig from .vdb.milvus_config import MilvusConfig @@ -336,6 +337,7 @@ class MiddlewareConfig( ChromaConfig, ClickzettaConfig, HuaweiCloudConfig, + IrisVectorConfig, MilvusConfig, AlibabaCloudMySQLConfig, MyScaleConfig, diff --git a/api/configs/middleware/vdb/iris_config.py b/api/configs/middleware/vdb/iris_config.py new file mode 100644 index 0000000000..c532d191c3 --- /dev/null +++ b/api/configs/middleware/vdb/iris_config.py @@ -0,0 +1,91 @@ +"""Configuration for InterSystems IRIS vector database.""" + +from pydantic import Field, PositiveInt, model_validator +from pydantic_settings import BaseSettings + + +class IrisVectorConfig(BaseSettings): + """Configuration settings for IRIS vector database connection and pooling.""" + + IRIS_HOST: str | None = Field( + description="Hostname or IP address of the IRIS server.", + default="localhost", + ) + + IRIS_SUPER_SERVER_PORT: PositiveInt | None = Field( + description="Port number for IRIS connection.", + default=1972, + ) + + IRIS_USER: str | None = Field( + description="Username for IRIS authentication.", + default="_SYSTEM", + ) + + IRIS_PASSWORD: str | None = Field( + description="Password for IRIS authentication.", + default="Dify@1234", + ) + + IRIS_SCHEMA: str | None = Field( + description="Schema name for IRIS tables.", + default="dify", + ) + + IRIS_DATABASE: str | None = Field( + description="Database namespace for IRIS connection.", + default="USER", + ) + + IRIS_CONNECTION_URL: str | None = Field( + description="Full connection URL for IRIS (overrides individual fields if provided).", + default=None, + ) + + IRIS_MIN_CONNECTION: PositiveInt = Field( + description="Minimum number of connections in the pool.", + default=1, + ) + + IRIS_MAX_CONNECTION: PositiveInt = Field( + description="Maximum number of connections in the pool.", + default=3, + ) + + IRIS_TEXT_INDEX: bool = Field( + description="Enable full-text search index using %iFind.Index.Basic.", + default=True, + ) + + IRIS_TEXT_INDEX_LANGUAGE: str = Field( + description="Language for full-text search index (e.g., 'en', 'ja', 'zh', 'de').", + default="en", + ) + + @model_validator(mode="before") + @classmethod + def validate_config(cls, values: dict) -> dict: + """Validate IRIS configuration values. + + Args: + values: Configuration dictionary + + Returns: + Validated configuration dictionary + + Raises: + ValueError: If required fields are missing or pool settings are invalid + """ + # Only validate required fields if IRIS is being used as the vector store + # This allows the config to be loaded even when IRIS is not in use + + # vector_store = os.environ.get("VECTOR_STORE", "") + # We rely on Pydantic defaults for required fields if they are missing from env. + # Strict existence check is removed to allow defaults to work. + + min_conn = values.get("IRIS_MIN_CONNECTION", 1) + max_conn = values.get("IRIS_MAX_CONNECTION", 3) + if min_conn > max_conn: + raise ValueError("IRIS_MIN_CONNECTION must be less than or equal to IRIS_MAX_CONNECTION") + + return values diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py index 70b6e932e9..8c4a4467a7 100644 --- a/api/controllers/console/datasets/datasets.py +++ b/api/controllers/console/datasets/datasets.py @@ -230,6 +230,7 @@ def _get_retrieval_methods_by_vector_type(vector_type: str | None, is_mock: bool VectorType.CLICKZETTA, VectorType.BAIDU, VectorType.ALIBABACLOUD_MYSQL, + VectorType.IRIS, } semantic_methods = {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH.value]} diff --git a/api/core/rag/datasource/vdb/iris/__init__.py b/api/core/rag/datasource/vdb/iris/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/core/rag/datasource/vdb/iris/iris_vector.py b/api/core/rag/datasource/vdb/iris/iris_vector.py new file mode 100644 index 0000000000..b1bfabb76e --- /dev/null +++ b/api/core/rag/datasource/vdb/iris/iris_vector.py @@ -0,0 +1,407 @@ +"""InterSystems IRIS vector database implementation for Dify. + +This module provides vector storage and retrieval using IRIS native VECTOR type +with HNSW indexing for efficient similarity search. +""" + +from __future__ import annotations + +import json +import logging +import threading +import uuid +from contextlib import contextmanager +from typing import TYPE_CHECKING, Any + +from configs import dify_config +from configs.middleware.vdb.iris_config import IrisVectorConfig +from core.rag.datasource.vdb.vector_base import BaseVector +from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory +from core.rag.datasource.vdb.vector_type import VectorType +from core.rag.embedding.embedding_base import Embeddings +from core.rag.models.document import Document +from extensions.ext_redis import redis_client +from models.dataset import Dataset + +if TYPE_CHECKING: + import iris +else: + try: + import iris + except ImportError: + iris = None # type: ignore[assignment] + +logger = logging.getLogger(__name__) + +# Singleton connection pool to minimize IRIS license usage +_pool_lock = threading.Lock() +_pool_instance: IrisConnectionPool | None = None + + +def get_iris_pool(config: IrisVectorConfig) -> IrisConnectionPool: + """Get or create the global IRIS connection pool (singleton pattern).""" + global _pool_instance # pylint: disable=global-statement + with _pool_lock: + if _pool_instance is None: + logger.info("Initializing IRIS connection pool") + _pool_instance = IrisConnectionPool(config) + return _pool_instance + + +class IrisConnectionPool: + """Thread-safe connection pool for IRIS database.""" + + def __init__(self, config: IrisVectorConfig) -> None: + self.config = config + self._pool: list[Any] = [] + self._lock = threading.Lock() + self._min_size = config.IRIS_MIN_CONNECTION + self._max_size = config.IRIS_MAX_CONNECTION + self._in_use = 0 + self._schemas_initialized: set[str] = set() # Cache for initialized schemas + self._initialize_pool() + + def _initialize_pool(self) -> None: + for _ in range(self._min_size): + self._pool.append(self._create_connection()) + + def _create_connection(self) -> Any: + return iris.connect( + hostname=self.config.IRIS_HOST, + port=self.config.IRIS_SUPER_SERVER_PORT, + namespace=self.config.IRIS_DATABASE, + username=self.config.IRIS_USER, + password=self.config.IRIS_PASSWORD, + ) + + def get_connection(self) -> Any: + """Get a connection from pool or create new if available.""" + with self._lock: + if self._pool: + conn = self._pool.pop() + self._in_use += 1 + return conn + if self._in_use < self._max_size: + conn = self._create_connection() + self._in_use += 1 + return conn + raise RuntimeError("Connection pool exhausted") + + def return_connection(self, conn: Any) -> None: + """Return connection to pool after validating it.""" + if not conn: + return + + # Validate connection health + is_valid = False + try: + cursor = conn.cursor() + cursor.execute("SELECT 1") + cursor.close() + is_valid = True + except (OSError, RuntimeError) as e: + logger.debug("Connection validation failed: %s", e) + try: + conn.close() + except (OSError, RuntimeError): + pass + + with self._lock: + self._pool.append(conn if is_valid else self._create_connection()) + self._in_use -= 1 + + def ensure_schema_exists(self, schema: str) -> None: + """Ensure schema exists in IRIS database. + + This method is idempotent and thread-safe. It uses a memory cache to avoid + redundant database queries for already-verified schemas. + + Args: + schema: Schema name to ensure exists + + Raises: + Exception: If schema creation fails + """ + # Fast path: check cache first (no lock needed for read-only set lookup) + if schema in self._schemas_initialized: + return + + # Slow path: acquire lock and check again (double-checked locking) + with self._lock: + if schema in self._schemas_initialized: + return + + # Get a connection to check/create schema + conn = self._pool[0] if self._pool else self._create_connection() + cursor = conn.cursor() + try: + # Check if schema exists using INFORMATION_SCHEMA + check_sql = """ + SELECT COUNT(*) FROM INFORMATION_SCHEMA.SCHEMATA + WHERE SCHEMA_NAME = ? + """ + cursor.execute(check_sql, (schema,)) # Must be tuple or list + exists = cursor.fetchone()[0] > 0 + + if not exists: + # Schema doesn't exist, create it + cursor.execute(f"CREATE SCHEMA {schema}") + conn.commit() + logger.info("Created schema: %s", schema) + else: + logger.debug("Schema already exists: %s", schema) + + # Add to cache to skip future checks + self._schemas_initialized.add(schema) + + except Exception as e: + conn.rollback() + logger.exception("Failed to ensure schema %s exists", schema) + raise + finally: + cursor.close() + + def close_all(self) -> None: + """Close all connections (application shutdown only).""" + with self._lock: + for conn in self._pool: + try: + conn.close() + except (OSError, RuntimeError): + pass + self._pool.clear() + self._in_use = 0 + self._schemas_initialized.clear() + + +class IrisVector(BaseVector): + """IRIS vector database implementation using native VECTOR type and HNSW indexing.""" + + def __init__(self, collection_name: str, config: IrisVectorConfig) -> None: + super().__init__(collection_name) + self.config = config + self.table_name = f"embedding_{collection_name}".upper() + self.schema = config.IRIS_SCHEMA or "dify" + self.pool = get_iris_pool(config) + + def get_type(self) -> str: + return VectorType.IRIS + + @contextmanager + def _get_cursor(self): + """Context manager for database cursor with connection pooling.""" + conn = self.pool.get_connection() + cursor = conn.cursor() + try: + yield cursor + conn.commit() + except Exception: + conn.rollback() + raise + finally: + cursor.close() + self.pool.return_connection(conn) + + def create(self, texts: list[Document], embeddings: list[list[float]], **kwargs) -> list[str]: + dimension = len(embeddings[0]) + self._create_collection(dimension) + return self.add_texts(texts, embeddings) + + def add_texts(self, documents: list[Document], embeddings: list[list[float]], **_kwargs) -> list[str]: + """Add documents with embeddings to the collection.""" + added_ids = [] + with self._get_cursor() as cursor: + for i, doc in enumerate(documents): + doc_id = doc.metadata.get("doc_id", str(uuid.uuid4())) if doc.metadata else str(uuid.uuid4()) + metadata = json.dumps(doc.metadata) if doc.metadata else "{}" + embedding_str = json.dumps(embeddings[i]) + + sql = f"INSERT INTO {self.schema}.{self.table_name} (id, text, meta, embedding) VALUES (?, ?, ?, ?)" + cursor.execute(sql, (doc_id, doc.page_content, metadata, embedding_str)) + added_ids.append(doc_id) + + return added_ids + + def text_exists(self, id: str) -> bool: # pylint: disable=redefined-builtin + try: + with self._get_cursor() as cursor: + sql = f"SELECT 1 FROM {self.schema}.{self.table_name} WHERE id = ?" + cursor.execute(sql, (id,)) + return cursor.fetchone() is not None + except (OSError, RuntimeError, ValueError): + return False + + def delete_by_ids(self, ids: list[str]) -> None: + if not ids: + return + + with self._get_cursor() as cursor: + placeholders = ",".join(["?" for _ in ids]) + sql = f"DELETE FROM {self.schema}.{self.table_name} WHERE id IN ({placeholders})" + cursor.execute(sql, ids) + + def delete_by_metadata_field(self, key: str, value: str) -> None: + """Delete documents by metadata field (JSON LIKE pattern matching).""" + with self._get_cursor() as cursor: + pattern = f'%"{key}": "{value}"%' + sql = f"DELETE FROM {self.schema}.{self.table_name} WHERE meta LIKE ?" + cursor.execute(sql, (pattern,)) + + def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]: + """Search similar documents using VECTOR_COSINE with HNSW index.""" + top_k = kwargs.get("top_k", 4) + score_threshold = float(kwargs.get("score_threshold") or 0.0) + embedding_str = json.dumps(query_vector) + + with self._get_cursor() as cursor: + sql = f""" + SELECT TOP {top_k} id, text, meta, VECTOR_COSINE(embedding, ?) as score + FROM {self.schema}.{self.table_name} + ORDER BY score DESC + """ + cursor.execute(sql, (embedding_str,)) + + docs = [] + for row in cursor.fetchall(): + if len(row) >= 4: + text, meta_str, score = row[1], row[2], float(row[3]) + if score >= score_threshold: + metadata = json.loads(meta_str) if meta_str else {} + metadata["score"] = score + docs.append(Document(page_content=text, metadata=metadata)) + return docs + + def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]: + """Search documents by full-text using iFind index or fallback to LIKE search.""" + top_k = kwargs.get("top_k", 5) + + with self._get_cursor() as cursor: + if self.config.IRIS_TEXT_INDEX: + # Use iFind full-text search with index + text_index_name = f"idx_{self.table_name}_text" + sql = f""" + SELECT TOP {top_k} id, text, meta + FROM {self.schema}.{self.table_name} + WHERE %ID %FIND search_index({text_index_name}, ?) + """ + cursor.execute(sql, (query,)) + else: + # Fallback to LIKE search (inefficient for large datasets) + query_pattern = f"%{query}%" + sql = f""" + SELECT TOP {top_k} id, text, meta + FROM {self.schema}.{self.table_name} + WHERE text LIKE ? + """ + cursor.execute(sql, (query_pattern,)) + + docs = [] + for row in cursor.fetchall(): + if len(row) >= 3: + metadata = json.loads(row[2]) if row[2] else {} + docs.append(Document(page_content=row[1], metadata=metadata)) + + if not docs: + logger.info("Full-text search for '%s' returned no results", query) + + return docs + + def delete(self) -> None: + """Delete the entire collection (drop table - permanent).""" + with self._get_cursor() as cursor: + sql = f"DROP TABLE {self.schema}.{self.table_name}" + cursor.execute(sql) + + def _create_collection(self, dimension: int) -> None: + """Create table with VECTOR column and HNSW index. + + Uses Redis lock to prevent concurrent creation attempts across multiple + API server instances (api, worker, worker_beat). + """ + cache_key = f"vector_indexing_{self._collection_name}" + lock_name = f"{cache_key}_lock" + + with redis_client.lock(lock_name, timeout=20): # pylint: disable=not-context-manager + if redis_client.get(cache_key): + return + + # Ensure schema exists (idempotent, cached after first call) + self.pool.ensure_schema_exists(self.schema) + + with self._get_cursor() as cursor: + # Create table with VECTOR column + sql = f""" + CREATE TABLE {self.schema}.{self.table_name} ( + id VARCHAR(255) PRIMARY KEY, + text CLOB, + meta CLOB, + embedding VECTOR(DOUBLE, {dimension}) + ) + """ + logger.info("Creating table: %s.%s", self.schema, self.table_name) + cursor.execute(sql) + + # Create HNSW index for vector similarity search + index_name = f"idx_{self.table_name}_embedding" + sql_index = ( + f"CREATE INDEX {index_name} ON {self.schema}.{self.table_name} " + "(embedding) AS HNSW(Distance='Cosine')" + ) + logger.info("Creating HNSW index: %s", index_name) + cursor.execute(sql_index) + logger.info("HNSW index created successfully: %s", index_name) + + # Create full-text search index if enabled + logger.info( + "IRIS_TEXT_INDEX config value: %s (type: %s)", + self.config.IRIS_TEXT_INDEX, + type(self.config.IRIS_TEXT_INDEX), + ) + if self.config.IRIS_TEXT_INDEX: + text_index_name = f"idx_{self.table_name}_text" + language = self.config.IRIS_TEXT_INDEX_LANGUAGE + # Fixed: Removed extra parentheses and corrected syntax + sql_text_index = f""" + CREATE INDEX {text_index_name} ON {self.schema}.{self.table_name} (text) + AS %iFind.Index.Basic + (LANGUAGE = '{language}', LOWER = 1, INDEXOPTION = 0) + """ + logger.info("Creating text index: %s with language: %s", text_index_name, language) + logger.info("SQL for text index: %s", sql_text_index) + cursor.execute(sql_text_index) + logger.info("Text index created successfully: %s", text_index_name) + else: + logger.warning("Text index creation skipped - IRIS_TEXT_INDEX is disabled") + + redis_client.set(cache_key, 1, ex=3600) + + +class IrisVectorFactory(AbstractVectorFactory): + """Factory for creating IrisVector instances.""" + + def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings) -> IrisVector: + if dataset.index_struct_dict: + class_prefix: str = dataset.index_struct_dict["vector_store"]["class_prefix"] + collection_name = class_prefix + else: + dataset_id = dataset.id + collection_name = Dataset.gen_collection_name_by_id(dataset_id) + index_struct_dict = self.gen_index_struct_dict(VectorType.IRIS, collection_name) + dataset.index_struct = json.dumps(index_struct_dict) + + return IrisVector( + collection_name=collection_name, + config=IrisVectorConfig( + IRIS_HOST=dify_config.IRIS_HOST, + IRIS_SUPER_SERVER_PORT=dify_config.IRIS_SUPER_SERVER_PORT, + IRIS_USER=dify_config.IRIS_USER, + IRIS_PASSWORD=dify_config.IRIS_PASSWORD, + IRIS_DATABASE=dify_config.IRIS_DATABASE, + IRIS_SCHEMA=dify_config.IRIS_SCHEMA, + IRIS_CONNECTION_URL=dify_config.IRIS_CONNECTION_URL, + IRIS_MIN_CONNECTION=dify_config.IRIS_MIN_CONNECTION, + IRIS_MAX_CONNECTION=dify_config.IRIS_MAX_CONNECTION, + IRIS_TEXT_INDEX=dify_config.IRIS_TEXT_INDEX, + IRIS_TEXT_INDEX_LANGUAGE=dify_config.IRIS_TEXT_INDEX_LANGUAGE, + ), + ) diff --git a/api/core/rag/datasource/vdb/vector_factory.py b/api/core/rag/datasource/vdb/vector_factory.py index 3a47241293..9573b491a5 100644 --- a/api/core/rag/datasource/vdb/vector_factory.py +++ b/api/core/rag/datasource/vdb/vector_factory.py @@ -187,6 +187,10 @@ class Vector: from core.rag.datasource.vdb.clickzetta.clickzetta_vector import ClickzettaVectorFactory return ClickzettaVectorFactory + case VectorType.IRIS: + from core.rag.datasource.vdb.iris.iris_vector import IrisVectorFactory + + return IrisVectorFactory case _: raise ValueError(f"Vector store {vector_type} is not supported.") diff --git a/api/core/rag/datasource/vdb/vector_type.py b/api/core/rag/datasource/vdb/vector_type.py index bc7d93a2e0..263d22195e 100644 --- a/api/core/rag/datasource/vdb/vector_type.py +++ b/api/core/rag/datasource/vdb/vector_type.py @@ -32,3 +32,4 @@ class VectorType(StrEnum): HUAWEI_CLOUD = "huawei_cloud" MATRIXONE = "matrixone" CLICKZETTA = "clickzetta" + IRIS = "iris" diff --git a/api/pyproject.toml b/api/pyproject.toml index 092b5ab9f9..4fbd7433d1 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -216,6 +216,7 @@ vdb = [ "pymochow==2.2.9", "pyobvector~=0.2.17", "qdrant-client==1.9.0", + "intersystems-irispython>=5.1.0", "tablestore==6.3.7", "tcvectordb~=1.6.4", "tidb-vector==0.0.9", diff --git a/api/tests/integration_tests/.env.example b/api/tests/integration_tests/.env.example index e508ceef66..acc268f1d4 100644 --- a/api/tests/integration_tests/.env.example +++ b/api/tests/integration_tests/.env.example @@ -55,7 +55,7 @@ WEB_API_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,* CONSOLE_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,* # Vector database configuration -# support: weaviate, qdrant, milvus, myscale, relyt, pgvecto_rs, pgvector, pgvector, chroma, opensearch, tidb_vector, couchbase, vikingdb, upstash, lindorm, oceanbase +# support: weaviate, qdrant, milvus, myscale, relyt, pgvecto_rs, pgvector, pgvector, chroma, opensearch, tidb_vector, couchbase, vikingdb, upstash, lindorm, oceanbase, iris VECTOR_STORE=weaviate # Weaviate configuration WEAVIATE_ENDPOINT=http://localhost:8080 @@ -64,6 +64,20 @@ WEAVIATE_GRPC_ENABLED=false WEAVIATE_BATCH_SIZE=100 WEAVIATE_TOKENIZATION=word +# InterSystems IRIS configuration +IRIS_HOST=localhost +IRIS_SUPER_SERVER_PORT=1972 +IRIS_WEB_SERVER_PORT=52773 +IRIS_USER=_SYSTEM +IRIS_PASSWORD=Dify@1234 +IRIS_DATABASE=USER +IRIS_SCHEMA=dify +IRIS_CONNECTION_URL= +IRIS_MIN_CONNECTION=1 +IRIS_MAX_CONNECTION=3 +IRIS_TEXT_INDEX=true +IRIS_TEXT_INDEX_LANGUAGE=en + # Upload configuration UPLOAD_FILE_SIZE_LIMIT=15 diff --git a/api/tests/integration_tests/vdb/iris/__init__.py b/api/tests/integration_tests/vdb/iris/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/integration_tests/vdb/iris/test_iris.py b/api/tests/integration_tests/vdb/iris/test_iris.py new file mode 100644 index 0000000000..49f6857743 --- /dev/null +++ b/api/tests/integration_tests/vdb/iris/test_iris.py @@ -0,0 +1,44 @@ +"""Integration tests for IRIS vector database.""" + +from core.rag.datasource.vdb.iris.iris_vector import IrisVector, IrisVectorConfig +from tests.integration_tests.vdb.test_vector_store import ( + AbstractVectorTest, + setup_mock_redis, +) + + +class IrisVectorTest(AbstractVectorTest): + """Test suite for IRIS vector store implementation.""" + + def __init__(self): + """Initialize IRIS vector test with hardcoded test configuration. + + Note: Uses 'host.docker.internal' to connect from DevContainer to + host OS Docker, or 'localhost' when running directly on host OS. + """ + super().__init__() + self.vector = IrisVector( + collection_name=self.collection_name, + config=IrisVectorConfig( + IRIS_HOST="host.docker.internal", + IRIS_SUPER_SERVER_PORT=1972, + IRIS_USER="_SYSTEM", + IRIS_PASSWORD="Dify@1234", + IRIS_DATABASE="USER", + IRIS_SCHEMA="dify", + IRIS_CONNECTION_URL=None, + IRIS_MIN_CONNECTION=1, + IRIS_MAX_CONNECTION=3, + IRIS_TEXT_INDEX=True, + IRIS_TEXT_INDEX_LANGUAGE="en", + ), + ) + + +def test_iris_vector(setup_mock_redis) -> None: + """Run all IRIS vector store tests. + + Args: + setup_mock_redis: Pytest fixture for mock Redis setup + """ + IrisVectorTest().run_all_tests() diff --git a/api/uv.lock b/api/uv.lock index 8cd49d057f..e6a6cf8ffc 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -1515,6 +1515,7 @@ vdb = [ { name = "clickzetta-connector-python" }, { name = "couchbase" }, { name = "elasticsearch" }, + { name = "intersystems-irispython" }, { name = "mo-vector" }, { name = "mysql-connector-python" }, { name = "opensearch-py" }, @@ -1711,6 +1712,7 @@ vdb = [ { name = "clickzetta-connector-python", specifier = ">=0.8.102" }, { name = "couchbase", specifier = "~=4.3.0" }, { name = "elasticsearch", specifier = "==8.14.0" }, + { name = "intersystems-irispython", specifier = ">=5.1.0" }, { name = "mo-vector", specifier = "~=0.1.13" }, { name = "mysql-connector-python", specifier = ">=9.3.0" }, { name = "opensearch-py", specifier = "==2.4.0" }, @@ -2918,6 +2920,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484 }, ] +[[package]] +name = "intersystems-irispython" +version = "5.3.0" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5d/56/16d93576b50408d97a5cbbd055d8da024d585e96a360e2adc95b41ae6284/intersystems_irispython-5.3.0-cp38.cp39.cp310.cp311.cp312.cp313-cp38.cp39.cp310.cp311.cp312.cp313-macosx_10_9_universal2.whl", hash = "sha256:59d3176a35867a55b1ab69a6b5c75438b460291bccb254c2d2f4173be08b6e55", size = 6594480, upload-time = "2025-10-09T20:47:27.629Z" }, + { url = "https://files.pythonhosted.org/packages/99/bc/19e144ee805ea6ee0df6342a711e722c84347c05a75b3bf040c5fbe19982/intersystems_irispython-5.3.0-cp38.cp39.cp310.cp311.cp312.cp313-cp38.cp39.cp310.cp311.cp312.cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:56bccefd1997c25f9f9f6c4086214c18d4fdaac0a93319d4b21dd9a6c59c9e51", size = 14779928, upload-time = "2025-10-09T20:47:30.564Z" }, + { url = "https://files.pythonhosted.org/packages/e6/fb/59ba563a80b39e9450b4627b5696019aa831dce27dacc3831b8c1e669102/intersystems_irispython-5.3.0-cp38.cp39.cp310.cp311.cp312.cp313-cp38.cp39.cp310.cp311.cp312.cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e160adc0785c55bb64e4264b8e99075691a15b0afa5d8d529f1b4bac7e57b81", size = 14422035, upload-time = "2025-10-09T20:47:32.552Z" }, + { url = "https://files.pythonhosted.org/packages/c1/68/ade8ad43f0ed1e5fba60e1710fa5ddeb01285f031e465e8c006329072e63/intersystems_irispython-5.3.0-cp38.cp39.cp310.cp311.cp312.cp313-cp38.cp39.cp310.cp311.cp312.cp313-win32.whl", hash = "sha256:820f2c5729119e5173a5bf6d6ac2a41275c4f1ffba6af6c59ea313ecd8f499cc", size = 2824316, upload-time = "2025-10-09T20:47:28.998Z" }, + { url = "https://files.pythonhosted.org/packages/f4/03/cd45cb94e42c01dc525efebf3c562543a18ee55b67fde4022665ca672351/intersystems_irispython-5.3.0-cp38.cp39.cp310.cp311.cp312.cp313-cp38.cp39.cp310.cp311.cp312.cp313-win_amd64.whl", hash = "sha256:fc07ec24bc50b6f01573221cd7d86f2937549effe31c24af8db118e0131e340c", size = 3463297, upload-time = "2025-10-09T20:47:34.636Z" }, +] + [[package]] name = "intervaltree" version = "3.1.0" diff --git a/docker/.env.example b/docker/.env.example index 04088b72a8..0227f75b70 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -518,7 +518,7 @@ SUPABASE_URL=your-server-url # ------------------------------ # The type of vector store to use. -# Supported values are `weaviate`, `oceanbase`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`, `clickzetta`, `alibabacloud_mysql`. +# Supported values are `weaviate`, `oceanbase`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`, `clickzetta`, `alibabacloud_mysql`, `iris`. VECTOR_STORE=weaviate # Prefix used to create collection name in vector database VECTOR_INDEX_NAME_PREFIX=Vector_index @@ -792,6 +792,21 @@ CLICKZETTA_ANALYZER_TYPE=chinese CLICKZETTA_ANALYZER_MODE=smart CLICKZETTA_VECTOR_DISTANCE_FUNCTION=cosine_distance +# InterSystems IRIS configuration, only available when VECTOR_STORE is `iris` +IRIS_HOST=iris +IRIS_SUPER_SERVER_PORT=1972 +IRIS_WEB_SERVER_PORT=52773 +IRIS_USER=_SYSTEM +IRIS_PASSWORD=Dify@1234 +IRIS_DATABASE=USER +IRIS_SCHEMA=dify +IRIS_CONNECTION_URL= +IRIS_MIN_CONNECTION=1 +IRIS_MAX_CONNECTION=3 +IRIS_TEXT_INDEX=true +IRIS_TEXT_INDEX_LANGUAGE=en +IRIS_TIMEZONE=UTC + # ------------------------------ # Knowledge Configuration # ------------------------------ diff --git a/docker/docker-compose-template.yaml b/docker/docker-compose-template.yaml index b12d06ca97..6ba3409288 100644 --- a/docker/docker-compose-template.yaml +++ b/docker/docker-compose-template.yaml @@ -648,6 +648,26 @@ services: CHROMA_SERVER_AUTHN_PROVIDER: ${CHROMA_SERVER_AUTHN_PROVIDER:-chromadb.auth.token_authn.TokenAuthenticationServerProvider} IS_PERSISTENT: ${CHROMA_IS_PERSISTENT:-TRUE} + # InterSystems IRIS vector database + iris: + image: containers.intersystems.com/intersystems/iris-community:2025.3 + profiles: + - iris + container_name: iris + restart: always + init: true + ports: + - "${IRIS_SUPER_SERVER_PORT:-1972}:1972" + - "${IRIS_WEB_SERVER_PORT:-52773}:52773" + volumes: + - ./volumes/iris:/opt/iris + - ./iris/iris-init.script:/iris-init.script + - ./iris/docker-entrypoint.sh:/custom-entrypoint.sh + entrypoint: ["/custom-entrypoint.sh"] + tty: true + environment: + TZ: ${IRIS_TIMEZONE:-UTC} + # Oracle vector database oracle: image: container-registry.oracle.com/database/free:latest diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 825f0650c8..71c0a5e687 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -361,6 +361,19 @@ x-shared-env: &shared-api-worker-env CLICKZETTA_ANALYZER_TYPE: ${CLICKZETTA_ANALYZER_TYPE:-chinese} CLICKZETTA_ANALYZER_MODE: ${CLICKZETTA_ANALYZER_MODE:-smart} CLICKZETTA_VECTOR_DISTANCE_FUNCTION: ${CLICKZETTA_VECTOR_DISTANCE_FUNCTION:-cosine_distance} + IRIS_HOST: ${IRIS_HOST:-iris} + IRIS_SUPER_SERVER_PORT: ${IRIS_SUPER_SERVER_PORT:-1972} + IRIS_WEB_SERVER_PORT: ${IRIS_WEB_SERVER_PORT:-52773} + IRIS_USER: ${IRIS_USER:-_SYSTEM} + IRIS_PASSWORD: ${IRIS_PASSWORD:-Dify@1234} + IRIS_DATABASE: ${IRIS_DATABASE:-USER} + IRIS_SCHEMA: ${IRIS_SCHEMA:-dify} + IRIS_CONNECTION_URL: ${IRIS_CONNECTION_URL:-} + IRIS_MIN_CONNECTION: ${IRIS_MIN_CONNECTION:-1} + IRIS_MAX_CONNECTION: ${IRIS_MAX_CONNECTION:-3} + IRIS_TEXT_INDEX: ${IRIS_TEXT_INDEX:-true} + IRIS_TEXT_INDEX_LANGUAGE: ${IRIS_TEXT_INDEX_LANGUAGE:-en} + IRIS_TIMEZONE: ${IRIS_TIMEZONE:-UTC} UPLOAD_FILE_SIZE_LIMIT: ${UPLOAD_FILE_SIZE_LIMIT:-15} UPLOAD_FILE_BATCH_LIMIT: ${UPLOAD_FILE_BATCH_LIMIT:-5} UPLOAD_FILE_EXTENSION_BLACKLIST: ${UPLOAD_FILE_EXTENSION_BLACKLIST:-} @@ -1286,6 +1299,26 @@ services: CHROMA_SERVER_AUTHN_PROVIDER: ${CHROMA_SERVER_AUTHN_PROVIDER:-chromadb.auth.token_authn.TokenAuthenticationServerProvider} IS_PERSISTENT: ${CHROMA_IS_PERSISTENT:-TRUE} + # InterSystems IRIS vector database + iris: + image: containers.intersystems.com/intersystems/iris-community:2025.3 + profiles: + - iris + container_name: iris + restart: always + init: true + ports: + - "${IRIS_SUPER_SERVER_PORT:-1972}:1972" + - "${IRIS_WEB_SERVER_PORT:-52773}:52773" + volumes: + - ./volumes/iris:/opt/iris + - ./iris/iris-init.script:/iris-init.script + - ./iris/docker-entrypoint.sh:/custom-entrypoint.sh + entrypoint: ["/custom-entrypoint.sh"] + tty: true + environment: + TZ: ${IRIS_TIMEZONE:-UTC} + # Oracle vector database oracle: image: container-registry.oracle.com/database/free:latest diff --git a/docker/iris/docker-entrypoint.sh b/docker/iris/docker-entrypoint.sh new file mode 100755 index 0000000000..067bfa03e2 --- /dev/null +++ b/docker/iris/docker-entrypoint.sh @@ -0,0 +1,38 @@ +#!/bin/bash +set -e + +# IRIS configuration flag file +IRIS_CONFIG_DONE="/opt/iris/.iris-configured" + +# Function to configure IRIS +configure_iris() { + echo "Configuring IRIS for first-time setup..." + + # Wait for IRIS to be fully started + sleep 5 + + # Execute the initialization script + iris session IRIS < /iris-init.script + + # Mark configuration as done + touch "$IRIS_CONFIG_DONE" + + echo "IRIS configuration completed." +} + +# Start IRIS in background for initial configuration if not already configured +if [ ! -f "$IRIS_CONFIG_DONE" ]; then + echo "First-time IRIS setup detected. Starting IRIS for configuration..." + + # Start IRIS + iris start IRIS + + # Configure IRIS + configure_iris + + # Stop IRIS + iris stop IRIS quietly +fi + +# Run the original IRIS entrypoint +exec /iris-main "$@" diff --git a/docker/iris/iris-init.script b/docker/iris/iris-init.script new file mode 100644 index 0000000000..c41fcf4efb --- /dev/null +++ b/docker/iris/iris-init.script @@ -0,0 +1,11 @@ +// Switch to the %SYS namespace to modify system settings +set $namespace="%SYS" + +// Set predefined user passwords to never expire (default password: SYS) +Do ##class(Security.Users).UnExpireUserPasswords("*") + +// Change the default password  +Do $SYSTEM.Security.ChangePassword("_SYSTEM","Dify@1234") + +// Install the Japanese locale (default is English since the container is Ubuntu-based) +// Do ##class(Config.NLS.Locales).Install("jpuw")