fix: implement score_threshold filtering for OceanBase vector search (#28536)

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
Conner Mo
2025-11-29 18:50:21 +08:00
committed by GitHub
parent 0a2d478749
commit acbc886ecd

View File

@@ -270,6 +270,10 @@ class OceanBaseVector(BaseVector):
self._client.set_ob_hnsw_ef_search(ef_search)
self._hnsw_ef_search = ef_search
topk = kwargs.get("top_k", 10)
try:
score_threshold = float(val) if (val := kwargs.get("score_threshold")) is not None else 0.0
except (ValueError, TypeError) as e:
raise ValueError(f"Invalid score_threshold parameter: {e}") from e
try:
cur = self._client.ann_search(
table_name=self._collection_name,
@@ -285,14 +289,20 @@ class OceanBaseVector(BaseVector):
raise Exception("Failed to search by vector. ", e)
docs = []
for _text, metadata, distance in cur:
metadata = json.loads(metadata)
metadata["score"] = 1 - distance / math.sqrt(2)
docs.append(
Document(
page_content=_text,
metadata=metadata,
score = 1 - distance / math.sqrt(2)
if score >= score_threshold:
try:
metadata = json.loads(metadata)
except json.JSONDecodeError:
logger.warning("Invalid JSON metadata: %s", metadata)
metadata = {}
metadata["score"] = score
docs.append(
Document(
page_content=_text,
metadata=metadata,
)
)
)
return docs
def delete(self):