1
0
mirror of synced 2025-12-25 02:09:19 -05:00

Bug: Fix issue with Pinecone custom namespaces not being created automatically (#38336)

This commit is contained in:
Bindi Pankhudi
2024-05-17 17:25:30 -07:00
committed by GitHub
parent 9e2b057e8c
commit b7de9f1587
6 changed files with 73 additions and 6 deletions

View File

@@ -137,7 +137,9 @@ class PineconeIndexer(Indexer):
for batch in serial_batches:
async_results = []
for ids_vectors_chunk in create_chunks(batch, batch_size=PINECONE_BATCH_SIZE):
async_result = self.pinecone_index.upsert(vectors=ids_vectors_chunk, async_req=True, show_progress=False)
async_result = self.pinecone_index.upsert(
vectors=ids_vectors_chunk, async_req=True, show_progress=False, namespace=namespace
)
async_results.append(async_result)
# Wait for and retrieve responses (this raises in case of error)
[async_result.result() for async_result in async_results]

View File

@@ -9,7 +9,18 @@ import time
from airbyte_cdk.destinations.vector_db_based.embedder import OPEN_AI_VECTOR_SIZE
from airbyte_cdk.destinations.vector_db_based.test_utils import BaseIntegrationTest
from airbyte_cdk.models import DestinationSyncMode, Status
from airbyte_cdk.models import (
AirbyteMessage,
AirbyteRecordMessage,
AirbyteStateMessage,
AirbyteStream,
ConfiguredAirbyteCatalog,
ConfiguredAirbyteStream,
DestinationSyncMode,
Status,
SyncMode,
Type,
)
from destination_pinecone.destination import DestinationPinecone
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
@@ -47,7 +58,14 @@ class PineconeIntegrationTest(BaseIntegrationTest):
if "Namespace not found" not in str(e):
raise(e)
else :
print("Noting to delete. No data in the index/namespace.")
print("Nothing to delete in default namespace. No data in the index/namespace.")
try:
self.pinecone_index.delete(delete_all=True, namespace="ns1")
except PineconeException as e:
if "Namespace not found" not in str(e):
raise(e)
else :
print("Nothing to delete in ns1 namespace. No data in the index/namespace.")
def test_integration_test_flag_is_set(self):
assert "PYTEST_CURRENT_TEST" in os.environ
@@ -107,3 +125,44 @@ class PineconeIntegrationTest(BaseIntegrationTest):
vector_store = Pinecone(self.pinecone_index_rest, embeddings.embed_query, "text")
result = vector_store.similarity_search("feline animals", 1)
assert result[0].metadata["_ab_record_id"] == "mystream_2"
def test_write_with_namespace(self):
catalog = self._get_configured_catalog_with_namespace(DestinationSyncMode.overwrite)
first_state_message = self._state({"state": "1"})
first_record_chunk = [self._record_with_namespace("mystream", f"Dogs are number {i}", i) for i in range(5)]
# initial sync
destination = DestinationPinecone()
list(destination.write(self.config, catalog, [*first_record_chunk, first_state_message]))
self._wait()
assert self.pinecone_index.describe_index_stats().total_vector_count == 5
def _get_configured_catalog_with_namespace(self, destination_mode: DestinationSyncMode) -> ConfiguredAirbyteCatalog:
stream_schema = {"type": "object", "properties": {"str_col": {"type": "str"}, "int_col": {"type": "integer"}, "random_col": {"type": "integer"}}}
overwrite_stream = ConfiguredAirbyteStream(
stream=AirbyteStream(
name="mystream",
namespace="ns1",
json_schema=stream_schema,
supported_sync_modes=[SyncMode.incremental, SyncMode.full_refresh]
),
primary_key=[["int_col"]],
sync_mode=SyncMode.incremental,
destination_sync_mode=destination_mode,
)
return ConfiguredAirbyteCatalog(streams=[overwrite_stream])
def _record_with_namespace(self, stream: str, str_value: str, int_value: int) -> AirbyteMessage:
return AirbyteMessage(
type=Type.RECORD, record=AirbyteRecordMessage(stream=stream,
namespace="ns1",
data={"str_col": str_value, "int_col": int_value},
emitted_at=0)
)

View File

@@ -13,7 +13,7 @@ data:
connectorSubtype: vectorstore
connectorType: destination
definitionId: 3d2b6f84-7f0d-4e3f-a5e5-7c7d4b50eabd
dockerImageTag: 0.1.1
dockerImageTag: 0.1.2
dockerRepository: airbyte/destination-pinecone
documentationUrl: https://docs.airbyte.com/integrations/destinations/pinecone
githubIssueLabel: destination-pinecone

View File

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "airbyte-destination-pinecone"
version = "0.1.1"
version = "0.1.2"
description = "Airbyte destination implementation for Pinecone."
authors = ["Airbyte <contact@airbyte.io>"]
license = "MIT"

View File

@@ -105,7 +105,8 @@ def test_pinecone_index_upsert_and_delete(mock_describe_index):
(ANY, [4, 5, 6], {"_ab_stream": "abc", "text": "test2"}),
),
async_req=True,
show_progress=False
show_progress=False,
namespace="ns1",
)
@@ -139,6 +140,7 @@ def test_pinecone_index_upsert_and_delete_starter(mock_describe_index, mock_dete
),
async_req=True,
show_progress=False,
namespace="ns1",
)
def test_pinecone_index_upsert_and_delete_pod(mock_describe_index, mock_determine_spec_type):
@@ -168,6 +170,7 @@ def test_pinecone_index_upsert_and_delete_pod(mock_describe_index, mock_determin
),
async_req=True,
show_progress=False,
namespace="ns1",
)
def test_pinecone_index_upsert_and_delete_serverless(mock_describe_index, mock_determine_spec_type):
@@ -197,6 +200,7 @@ def test_pinecone_index_upsert_and_delete_serverless(mock_describe_index, mock_d
),
async_req=True,
show_progress=False,
namespace="ns1",
)
@@ -356,4 +360,5 @@ def test_metadata_normalization():
vectors=((ANY, [1, 2, 3], {"_ab_stream": "abc", "text": "test", "small": "a", "id": 1}),),
async_req=True,
show_progress=False,
namespace=None,
)

View File

@@ -76,6 +76,7 @@ OpenAI and Fake embeddings produce vectors with 1536 dimensions, and the Cohere
| Version | Date | Pull Request | Subject |
| :------ | :--------- | :-------------------------------------------------------- | :--------------------------------------------------------------------------------------------------------------------------- |
| 0.1.2 | 2023-05-17 | [#38336](https://github.com/airbytehq/airbyte/pull/338336) | Fix for regression:Custom namespaces not created automatically
| 0.1.1 | 2023-05-14 | [#38151](https://github.com/airbytehq/airbyte/pull/38151) | Add airbyte source tag for attribution
| 0.1.0 | 2023-05-06 | [#37756](https://github.com/airbytehq/airbyte/pull/37756) | Add support for Pinecone Serverless |
| 0.0.24 | 2023-04-15 | [#37333](https://github.com/airbytehq/airbyte/pull/37333) | Update CDK & pytest version to fix security vulnerabilities. |