chore: 添加虚拟环境到仓库
- 添加 backend_service/venv 虚拟环境 - 包含所有Python依赖包 - 注意:虚拟环境约393MB,包含12655个文件
This commit is contained in:
@@ -0,0 +1,492 @@
|
||||
from typing import TYPE_CHECKING, Optional, Union, List, cast
|
||||
|
||||
from chromadb.api.types import (
|
||||
URI,
|
||||
CollectionMetadata,
|
||||
Embedding,
|
||||
PyEmbedding,
|
||||
Include,
|
||||
Metadata,
|
||||
Document,
|
||||
Image,
|
||||
Where,
|
||||
IDs,
|
||||
GetResult,
|
||||
QueryResult,
|
||||
ID,
|
||||
OneOrMany,
|
||||
WhereDocument,
|
||||
SearchResult,
|
||||
maybe_cast_one_to_many,
|
||||
)
|
||||
|
||||
from chromadb.api.models.CollectionCommon import CollectionCommon
|
||||
from chromadb.api.collection_configuration import UpdateCollectionConfiguration
|
||||
from chromadb.execution.expression.plan import Search
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from chromadb.api import AsyncServerAPI # noqa: F401
|
||||
|
||||
|
||||
class AsyncCollection(CollectionCommon["AsyncServerAPI"]):
|
||||
async def add(
|
||||
self,
|
||||
ids: OneOrMany[ID],
|
||||
embeddings: Optional[
|
||||
Union[
|
||||
OneOrMany[Embedding],
|
||||
OneOrMany[PyEmbedding],
|
||||
]
|
||||
] = None,
|
||||
metadatas: Optional[OneOrMany[Metadata]] = None,
|
||||
documents: Optional[OneOrMany[Document]] = None,
|
||||
images: Optional[OneOrMany[Image]] = None,
|
||||
uris: Optional[OneOrMany[URI]] = None,
|
||||
) -> None:
|
||||
"""Add embeddings to the data store.
|
||||
Args:
|
||||
ids: The ids of the embeddings you wish to add
|
||||
embeddings: The embeddings to add. If None, embeddings will be computed based on the documents or images using the embedding_function set for the Collection. Optional.
|
||||
metadatas: The metadata to associate with the embeddings. When querying, you can filter on this metadata. Optional.
|
||||
documents: The documents to associate with the embeddings. Optional.
|
||||
images: The images to associate with the embeddings. Optional.
|
||||
uris: The uris of the images to associate with the embeddings. Optional.
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
Raises:
|
||||
ValueError: If you don't provide either embeddings or documents
|
||||
ValueError: If the length of ids, embeddings, metadatas, or documents don't match
|
||||
ValueError: If you don't provide an embedding function and don't provide embeddings
|
||||
ValueError: If you provide both embeddings and documents
|
||||
ValueError: If you provide an id that already exists
|
||||
|
||||
"""
|
||||
add_request = self._validate_and_prepare_add_request(
|
||||
ids=ids,
|
||||
embeddings=embeddings,
|
||||
metadatas=metadatas,
|
||||
documents=documents,
|
||||
images=images,
|
||||
uris=uris,
|
||||
)
|
||||
|
||||
await self._client._add(
|
||||
collection_id=self.id,
|
||||
ids=add_request["ids"],
|
||||
embeddings=add_request["embeddings"],
|
||||
metadatas=add_request["metadatas"],
|
||||
documents=add_request["documents"],
|
||||
uris=add_request["uris"],
|
||||
tenant=self.tenant,
|
||||
database=self.database,
|
||||
)
|
||||
|
||||
async def count(self) -> int:
|
||||
"""The total number of embeddings added to the database
|
||||
|
||||
Returns:
|
||||
int: The total number of embeddings added to the database
|
||||
|
||||
"""
|
||||
return await self._client._count(
|
||||
collection_id=self.id,
|
||||
tenant=self.tenant,
|
||||
database=self.database,
|
||||
)
|
||||
|
||||
async def get(
|
||||
self,
|
||||
ids: Optional[OneOrMany[ID]] = None,
|
||||
where: Optional[Where] = None,
|
||||
limit: Optional[int] = None,
|
||||
offset: Optional[int] = None,
|
||||
where_document: Optional[WhereDocument] = None,
|
||||
include: Include = ["metadatas", "documents"],
|
||||
) -> GetResult:
|
||||
"""Get embeddings and their associate data from the data store. If no ids or where filter is provided returns
|
||||
all embeddings up to limit starting at offset.
|
||||
|
||||
Args:
|
||||
ids: The ids of the embeddings to get. Optional.
|
||||
where: A Where type dict used to filter results by. E.g. `{"$and": [{"color" : "red"}, {"price": {"$gte": 4.20}}]}`. Optional.
|
||||
limit: The number of documents to return. Optional.
|
||||
offset: The offset to start returning results from. Useful for paging results with limit. Optional.
|
||||
where_document: A WhereDocument type dict used to filter by the documents. E.g. `{"$contains": "hello"}`. Optional.
|
||||
include: A list of what to include in the results. Can contain `"embeddings"`, `"metadatas"`, `"documents"`. Ids are always included. Defaults to `["metadatas", "documents"]`. Optional.
|
||||
|
||||
Returns:
|
||||
GetResult: A GetResult object containing the results.
|
||||
|
||||
"""
|
||||
get_request = self._validate_and_prepare_get_request(
|
||||
ids=ids,
|
||||
where=where,
|
||||
where_document=where_document,
|
||||
include=include,
|
||||
)
|
||||
|
||||
get_results = await self._client._get(
|
||||
collection_id=self.id,
|
||||
ids=get_request["ids"],
|
||||
where=get_request["where"],
|
||||
where_document=get_request["where_document"],
|
||||
include=get_request["include"],
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
tenant=self.tenant,
|
||||
database=self.database,
|
||||
)
|
||||
|
||||
return self._transform_get_response(
|
||||
response=get_results, include=get_request["include"]
|
||||
)
|
||||
|
||||
async def peek(self, limit: int = 10) -> GetResult:
|
||||
"""Get the first few results in the database up to limit
|
||||
|
||||
Args:
|
||||
limit: The number of results to return.
|
||||
|
||||
Returns:
|
||||
GetResult: A GetResult object containing the results.
|
||||
"""
|
||||
return self._transform_peek_response(
|
||||
await self._client._peek(
|
||||
collection_id=self.id,
|
||||
n=limit,
|
||||
tenant=self.tenant,
|
||||
database=self.database,
|
||||
)
|
||||
)
|
||||
|
||||
async def query(
|
||||
self,
|
||||
query_embeddings: Optional[
|
||||
Union[
|
||||
OneOrMany[Embedding],
|
||||
OneOrMany[PyEmbedding],
|
||||
]
|
||||
] = None,
|
||||
query_texts: Optional[OneOrMany[Document]] = None,
|
||||
query_images: Optional[OneOrMany[Image]] = None,
|
||||
query_uris: Optional[OneOrMany[URI]] = None,
|
||||
ids: Optional[OneOrMany[ID]] = None,
|
||||
n_results: int = 10,
|
||||
where: Optional[Where] = None,
|
||||
where_document: Optional[WhereDocument] = None,
|
||||
include: Include = [
|
||||
"metadatas",
|
||||
"documents",
|
||||
"distances",
|
||||
],
|
||||
) -> QueryResult:
|
||||
"""Get the n_results nearest neighbor embeddings for provided query_embeddings or query_texts.
|
||||
|
||||
Args:
|
||||
query_embeddings: The embeddings to get the closes neighbors of. Optional.
|
||||
query_texts: The document texts to get the closes neighbors of. Optional.
|
||||
query_images: The images to get the closes neighbors of. Optional.
|
||||
ids: A subset of ids to search within. Optional.
|
||||
n_results: The number of neighbors to return for each query_embedding or query_texts. Optional.
|
||||
where: A Where type dict used to filter results by. E.g. `{"$and": [{"color" : "red"}, {"price": {"$gte": 4.20}}]}`. Optional.
|
||||
where_document: A WhereDocument type dict used to filter by the documents. E.g. `{"$contains": "hello"}`. Optional.
|
||||
include: A list of what to include in the results. Can contain `"embeddings"`, `"metadatas"`, `"documents"`, `"distances"`. Ids are always included. Defaults to `["metadatas", "documents", "distances"]`. Optional.
|
||||
|
||||
Returns:
|
||||
QueryResult: A QueryResult object containing the results.
|
||||
|
||||
Raises:
|
||||
ValueError: If you don't provide either query_embeddings, query_texts, or query_images
|
||||
ValueError: If you provide both query_embeddings and query_texts
|
||||
ValueError: If you provide both query_embeddings and query_images
|
||||
ValueError: If you provide both query_texts and query_images
|
||||
|
||||
"""
|
||||
|
||||
query_request = self._validate_and_prepare_query_request(
|
||||
query_embeddings=query_embeddings,
|
||||
query_texts=query_texts,
|
||||
query_images=query_images,
|
||||
query_uris=query_uris,
|
||||
ids=ids,
|
||||
n_results=n_results,
|
||||
where=where,
|
||||
where_document=where_document,
|
||||
include=include,
|
||||
)
|
||||
|
||||
query_results = await self._client._query(
|
||||
collection_id=self.id,
|
||||
ids=query_request["ids"],
|
||||
query_embeddings=query_request["embeddings"],
|
||||
n_results=query_request["n_results"],
|
||||
where=query_request["where"],
|
||||
where_document=query_request["where_document"],
|
||||
include=query_request["include"],
|
||||
tenant=self.tenant,
|
||||
database=self.database,
|
||||
)
|
||||
|
||||
return self._transform_query_response(
|
||||
response=query_results, include=query_request["include"]
|
||||
)
|
||||
|
||||
async def modify(
|
||||
self,
|
||||
name: Optional[str] = None,
|
||||
metadata: Optional[CollectionMetadata] = None,
|
||||
configuration: Optional[UpdateCollectionConfiguration] = None,
|
||||
) -> None:
|
||||
"""Modify the collection name or metadata
|
||||
|
||||
Args:
|
||||
name: The updated name for the collection. Optional.
|
||||
metadata: The updated metadata for the collection. Optional.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
|
||||
self._validate_modify_request(metadata)
|
||||
|
||||
# Note there is a race condition here where the metadata can be updated
|
||||
# but another thread sees the cached local metadata.
|
||||
# TODO: fixme
|
||||
await self._client._modify(
|
||||
id=self.id,
|
||||
new_name=name,
|
||||
new_metadata=metadata,
|
||||
new_configuration=configuration,
|
||||
tenant=self.tenant,
|
||||
database=self.database,
|
||||
)
|
||||
|
||||
self._update_model_after_modify_success(name, metadata, configuration)
|
||||
|
||||
async def fork(
|
||||
self,
|
||||
new_name: str,
|
||||
) -> "AsyncCollection":
|
||||
"""Fork the current collection under a new name. The returning collection should contain identical data to the current collection.
|
||||
This is an experimental API that only works for Hosted Chroma for now.
|
||||
|
||||
Args:
|
||||
new_name: The name of the new collection.
|
||||
|
||||
Returns:
|
||||
Collection: A new collection with the specified name and containing identical data to the current collection.
|
||||
"""
|
||||
model = await self._client._fork(
|
||||
collection_id=self.id,
|
||||
new_name=new_name,
|
||||
tenant=self.tenant,
|
||||
database=self.database,
|
||||
)
|
||||
return AsyncCollection(
|
||||
client=self._client,
|
||||
model=model,
|
||||
embedding_function=self._embedding_function,
|
||||
data_loader=self._data_loader,
|
||||
)
|
||||
|
||||
async def search(
|
||||
self,
|
||||
searches: OneOrMany[Search],
|
||||
) -> SearchResult:
|
||||
"""Perform hybrid search on the collection.
|
||||
This is an experimental API that only works for Hosted Chroma for now.
|
||||
|
||||
Args:
|
||||
searches: A single Search object or a list of Search objects, each containing:
|
||||
- where: Where expression for filtering
|
||||
- rank: Ranking expression for hybrid search (defaults to Val(0.0))
|
||||
- limit: Limit configuration for pagination (defaults to no limit)
|
||||
- select: Select configuration for keys to return (defaults to empty)
|
||||
|
||||
Returns:
|
||||
SearchResult: Column-major format response with:
|
||||
- ids: List of result IDs for each search payload
|
||||
- documents: Optional documents for each payload
|
||||
- embeddings: Optional embeddings for each payload
|
||||
- metadatas: Optional metadata for each payload
|
||||
- scores: Optional scores for each payload
|
||||
- select: List of selected keys for each payload
|
||||
|
||||
Raises:
|
||||
NotImplementedError: For local/segment API implementations
|
||||
|
||||
Examples:
|
||||
# Using builder pattern with Key constants
|
||||
from chromadb.execution.expression import (
|
||||
Search, Key, K, Knn, Val
|
||||
)
|
||||
|
||||
# Note: K is an alias for Key, so K.DOCUMENT == Key.DOCUMENT
|
||||
search = (Search()
|
||||
.where((K("category") == "science") & (K("score") > 0.5))
|
||||
.rank(Knn(query=[0.1, 0.2, 0.3]) * 0.8 + Val(0.5) * 0.2)
|
||||
.limit(10, offset=0)
|
||||
.select(K.DOCUMENT, K.SCORE, "title"))
|
||||
|
||||
# Direct construction
|
||||
from chromadb.execution.expression import (
|
||||
Search, Eq, And, Gt, Knn, Limit, Select, Key
|
||||
)
|
||||
|
||||
search = Search(
|
||||
where=And([Eq("category", "science"), Gt("score", 0.5)]),
|
||||
rank=Knn(query=[0.1, 0.2, 0.3]),
|
||||
limit=Limit(offset=0, limit=10),
|
||||
select=Select(keys={Key.DOCUMENT, Key.SCORE, "title"})
|
||||
)
|
||||
|
||||
# Single search
|
||||
result = await collection.search(search)
|
||||
|
||||
# Multiple searches at once
|
||||
searches = [
|
||||
Search().where(K("type") == "article").rank(Knn(query=[0.1, 0.2])),
|
||||
Search().where(K("type") == "paper").rank(Knn(query=[0.3, 0.4]))
|
||||
]
|
||||
results = await collection.search(searches)
|
||||
"""
|
||||
# Convert single search to list for consistent handling
|
||||
searches_list = maybe_cast_one_to_many(searches)
|
||||
if searches_list is None:
|
||||
searches_list = []
|
||||
|
||||
# Embed any string queries in Knn objects
|
||||
embedded_searches = [
|
||||
self._embed_search_string_queries(search) for search in searches_list
|
||||
]
|
||||
|
||||
return await self._client._search(
|
||||
collection_id=self.id,
|
||||
searches=cast(List[Search], embedded_searches),
|
||||
tenant=self.tenant,
|
||||
database=self.database,
|
||||
)
|
||||
|
||||
async def update(
|
||||
self,
|
||||
ids: OneOrMany[ID],
|
||||
embeddings: Optional[
|
||||
Union[
|
||||
OneOrMany[Embedding],
|
||||
OneOrMany[PyEmbedding],
|
||||
]
|
||||
] = None,
|
||||
metadatas: Optional[OneOrMany[Metadata]] = None,
|
||||
documents: Optional[OneOrMany[Document]] = None,
|
||||
images: Optional[OneOrMany[Image]] = None,
|
||||
uris: Optional[OneOrMany[URI]] = None,
|
||||
) -> None:
|
||||
"""Update the embeddings, metadatas or documents for provided ids.
|
||||
|
||||
Args:
|
||||
ids: The ids of the embeddings to update
|
||||
embeddings: The embeddings to update. If None, embeddings will be computed based on the documents or images using the embedding_function set for the Collection. Optional.
|
||||
metadatas: The metadata to associate with the embeddings. When querying, you can filter on this metadata. Optional.
|
||||
documents: The documents to associate with the embeddings. Optional.
|
||||
images: The images to associate with the embeddings. Optional.
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
update_request = self._validate_and_prepare_update_request(
|
||||
ids=ids,
|
||||
embeddings=embeddings,
|
||||
metadatas=metadatas,
|
||||
documents=documents,
|
||||
images=images,
|
||||
uris=uris,
|
||||
)
|
||||
|
||||
await self._client._update(
|
||||
collection_id=self.id,
|
||||
ids=update_request["ids"],
|
||||
embeddings=update_request["embeddings"],
|
||||
metadatas=update_request["metadatas"],
|
||||
documents=update_request["documents"],
|
||||
uris=update_request["uris"],
|
||||
tenant=self.tenant,
|
||||
database=self.database,
|
||||
)
|
||||
|
||||
async def upsert(
|
||||
self,
|
||||
ids: OneOrMany[ID],
|
||||
embeddings: Optional[
|
||||
Union[
|
||||
OneOrMany[Embedding],
|
||||
OneOrMany[PyEmbedding],
|
||||
]
|
||||
] = None,
|
||||
metadatas: Optional[OneOrMany[Metadata]] = None,
|
||||
documents: Optional[OneOrMany[Document]] = None,
|
||||
images: Optional[OneOrMany[Image]] = None,
|
||||
uris: Optional[OneOrMany[URI]] = None,
|
||||
) -> None:
|
||||
"""Update the embeddings, metadatas or documents for provided ids, or create them if they don't exist.
|
||||
|
||||
Args:
|
||||
ids: The ids of the embeddings to update
|
||||
embeddings: The embeddings to add. If None, embeddings will be computed based on the documents using the embedding_function set for the Collection. Optional.
|
||||
metadatas: The metadata to associate with the embeddings. When querying, you can filter on this metadata. Optional.
|
||||
documents: The documents to associate with the embeddings. Optional.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
upsert_request = self._validate_and_prepare_upsert_request(
|
||||
ids=ids,
|
||||
embeddings=embeddings,
|
||||
metadatas=metadatas,
|
||||
documents=documents,
|
||||
images=images,
|
||||
uris=uris,
|
||||
)
|
||||
|
||||
await self._client._upsert(
|
||||
collection_id=self.id,
|
||||
ids=upsert_request["ids"],
|
||||
embeddings=upsert_request["embeddings"],
|
||||
metadatas=upsert_request["metadatas"],
|
||||
documents=upsert_request["documents"],
|
||||
uris=upsert_request["uris"],
|
||||
tenant=self.tenant,
|
||||
database=self.database,
|
||||
)
|
||||
|
||||
async def delete(
|
||||
self,
|
||||
ids: Optional[IDs] = None,
|
||||
where: Optional[Where] = None,
|
||||
where_document: Optional[WhereDocument] = None,
|
||||
) -> None:
|
||||
"""Delete the embeddings based on ids and/or a where filter
|
||||
|
||||
Args:
|
||||
ids: The ids of the embeddings to delete
|
||||
where: A Where type dict used to filter the delection by. E.g. `{"$and": [{"color" : "red"}, {"price": {"$gte": 4.20}}]}`. Optional.
|
||||
where_document: A WhereDocument type dict used to filter the deletion by the document content. E.g. `{"$contains": "hello"}`. Optional.
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
Raises:
|
||||
ValueError: If you don't provide either ids, where, or where_document
|
||||
"""
|
||||
delete_request = self._validate_and_prepare_delete_request(
|
||||
ids, where, where_document
|
||||
)
|
||||
|
||||
await self._client._delete(
|
||||
collection_id=self.id,
|
||||
ids=delete_request["ids"],
|
||||
where=delete_request["where"],
|
||||
where_document=delete_request["where_document"],
|
||||
tenant=self.tenant,
|
||||
database=self.database,
|
||||
)
|
||||
@@ -0,0 +1,101 @@
|
||||
from typing import TYPE_CHECKING, Optional, Dict, Any
|
||||
from uuid import UUID
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from chromadb.api import ServerAPI # noqa: F401
|
||||
|
||||
|
||||
class AttachedFunction:
|
||||
"""Represents a function attached to a collection."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
client: "ServerAPI",
|
||||
id: UUID,
|
||||
name: str,
|
||||
function_id: str,
|
||||
input_collection_id: UUID,
|
||||
output_collection: str,
|
||||
params: Optional[Dict[str, Any]],
|
||||
tenant: str,
|
||||
database: str,
|
||||
):
|
||||
"""Initialize an AttachedFunction.
|
||||
|
||||
Args:
|
||||
client: The API client
|
||||
id: Unique identifier for this attached function
|
||||
name: Name of this attached function instance
|
||||
function_id: The function identifier (e.g., "record_counter")
|
||||
input_collection_id: ID of the input collection
|
||||
output_collection: Name of the output collection
|
||||
params: Function-specific parameters
|
||||
tenant: The tenant name
|
||||
database: The database name
|
||||
"""
|
||||
self._client = client
|
||||
self._id = id
|
||||
self._name = name
|
||||
self._function_id = function_id
|
||||
self._input_collection_id = input_collection_id
|
||||
self._output_collection = output_collection
|
||||
self._params = params
|
||||
self._tenant = tenant
|
||||
self._database = database
|
||||
|
||||
@property
|
||||
def id(self) -> UUID:
|
||||
"""The unique identifier of this attached function."""
|
||||
return self._id
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
"""The name of this attached function instance."""
|
||||
return self._name
|
||||
|
||||
@property
|
||||
def function_id(self) -> str:
|
||||
"""The function identifier."""
|
||||
return self._function_id
|
||||
|
||||
@property
|
||||
def input_collection_id(self) -> UUID:
|
||||
"""The ID of the input collection."""
|
||||
return self._input_collection_id
|
||||
|
||||
@property
|
||||
def output_collection(self) -> str:
|
||||
"""The name of the output collection."""
|
||||
return self._output_collection
|
||||
|
||||
@property
|
||||
def params(self) -> Optional[Dict[str, Any]]:
|
||||
"""The function parameters."""
|
||||
return self._params
|
||||
|
||||
def detach(self, delete_output_collection: bool = False) -> bool:
|
||||
"""Detach this function and prevent any further runs.
|
||||
|
||||
Args:
|
||||
delete_output_collection: Whether to also delete the output collection. Defaults to False.
|
||||
|
||||
Returns:
|
||||
bool: True if successful
|
||||
|
||||
Example:
|
||||
>>> success = attached_fn.detach(delete_output_collection=True)
|
||||
"""
|
||||
return self._client.detach_function(
|
||||
attached_function_id=self._id,
|
||||
delete_output=delete_output_collection,
|
||||
tenant=self._tenant,
|
||||
database=self._database,
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return (
|
||||
f"AttachedFunction(id={self._id}, name='{self._name}', "
|
||||
f"function_id='{self._function_id}', "
|
||||
f"input_collection_id={self._input_collection_id}, "
|
||||
f"output_collection='{self._output_collection}')"
|
||||
)
|
||||
@@ -0,0 +1,535 @@
|
||||
from typing import TYPE_CHECKING, Optional, Union, List, cast, Dict, Any
|
||||
|
||||
from chromadb.api.models.CollectionCommon import CollectionCommon
|
||||
from chromadb.api.types import (
|
||||
URI,
|
||||
CollectionMetadata,
|
||||
Embedding,
|
||||
PyEmbedding,
|
||||
Include,
|
||||
Metadata,
|
||||
Document,
|
||||
Image,
|
||||
Where,
|
||||
IDs,
|
||||
GetResult,
|
||||
QueryResult,
|
||||
ID,
|
||||
OneOrMany,
|
||||
WhereDocument,
|
||||
SearchResult,
|
||||
maybe_cast_one_to_many,
|
||||
)
|
||||
from chromadb.api.collection_configuration import UpdateCollectionConfiguration
|
||||
from chromadb.execution.expression.plan import Search
|
||||
|
||||
import logging
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from chromadb.api.models.AttachedFunction import AttachedFunction
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from chromadb.api import ServerAPI # noqa: F401
|
||||
|
||||
|
||||
class Collection(CollectionCommon["ServerAPI"]):
|
||||
def count(self) -> int:
|
||||
"""The total number of embeddings added to the database
|
||||
|
||||
Returns:
|
||||
int: The total number of embeddings added to the database
|
||||
|
||||
"""
|
||||
return self._client._count(
|
||||
collection_id=self.id,
|
||||
tenant=self.tenant,
|
||||
database=self.database,
|
||||
)
|
||||
|
||||
def add(
|
||||
self,
|
||||
ids: OneOrMany[ID],
|
||||
embeddings: Optional[
|
||||
Union[
|
||||
OneOrMany[Embedding],
|
||||
OneOrMany[PyEmbedding],
|
||||
]
|
||||
] = None,
|
||||
metadatas: Optional[OneOrMany[Metadata]] = None,
|
||||
documents: Optional[OneOrMany[Document]] = None,
|
||||
images: Optional[OneOrMany[Image]] = None,
|
||||
uris: Optional[OneOrMany[URI]] = None,
|
||||
) -> None:
|
||||
"""Add embeddings to the data store.
|
||||
Args:
|
||||
ids: The ids of the embeddings you wish to add
|
||||
embeddings: The embeddings to add. If None, embeddings will be computed based on the documents or images using the embedding_function set for the Collection. Optional.
|
||||
metadatas: The metadata to associate with the embeddings. When querying, you can filter on this metadata. Optional.
|
||||
documents: The documents to associate with the embeddings. Optional.
|
||||
images: The images to associate with the embeddings. Optional.
|
||||
uris: The uris of the images to associate with the embeddings. Optional.
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
Raises:
|
||||
ValueError: If you don't provide either embeddings or documents
|
||||
ValueError: If the length of ids, embeddings, metadatas, or documents don't match
|
||||
ValueError: If you don't provide an embedding function and don't provide embeddings
|
||||
ValueError: If you provide both embeddings and documents
|
||||
ValueError: If you provide an id that already exists
|
||||
|
||||
"""
|
||||
|
||||
add_request = self._validate_and_prepare_add_request(
|
||||
ids=ids,
|
||||
embeddings=embeddings,
|
||||
metadatas=metadatas,
|
||||
documents=documents,
|
||||
images=images,
|
||||
uris=uris,
|
||||
)
|
||||
|
||||
self._client._add(
|
||||
collection_id=self.id,
|
||||
ids=add_request["ids"],
|
||||
embeddings=add_request["embeddings"],
|
||||
metadatas=add_request["metadatas"],
|
||||
documents=add_request["documents"],
|
||||
uris=add_request["uris"],
|
||||
tenant=self.tenant,
|
||||
database=self.database,
|
||||
)
|
||||
|
||||
def get(
|
||||
self,
|
||||
ids: Optional[OneOrMany[ID]] = None,
|
||||
where: Optional[Where] = None,
|
||||
limit: Optional[int] = None,
|
||||
offset: Optional[int] = None,
|
||||
where_document: Optional[WhereDocument] = None,
|
||||
include: Include = ["metadatas", "documents"],
|
||||
) -> GetResult:
|
||||
"""Get embeddings and their associate data from the data store. If no ids or where filter is provided returns
|
||||
all embeddings up to limit starting at offset.
|
||||
|
||||
Args:
|
||||
ids: The ids of the embeddings to get. Optional.
|
||||
where: A Where type dict used to filter results by. E.g. `{"$and": [{"color" : "red"}, {"price": {"$gte": 4.20}}]}`. Optional.
|
||||
limit: The number of documents to return. Optional.
|
||||
offset: The offset to start returning results from. Useful for paging results with limit. Optional.
|
||||
where_document: A WhereDocument type dict used to filter by the documents. E.g. `{"$contains": "hello"}`. Optional.
|
||||
include: A list of what to include in the results. Can contain `"embeddings"`, `"metadatas"`, `"documents"`. Ids are always included. Defaults to `["metadatas", "documents"]`. Optional.
|
||||
|
||||
Returns:
|
||||
GetResult: A GetResult object containing the results.
|
||||
|
||||
"""
|
||||
get_request = self._validate_and_prepare_get_request(
|
||||
ids=ids,
|
||||
where=where,
|
||||
where_document=where_document,
|
||||
include=include,
|
||||
)
|
||||
|
||||
get_results = self._client._get(
|
||||
collection_id=self.id,
|
||||
ids=get_request["ids"],
|
||||
where=get_request["where"],
|
||||
where_document=get_request["where_document"],
|
||||
include=get_request["include"],
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
tenant=self.tenant,
|
||||
database=self.database,
|
||||
)
|
||||
return self._transform_get_response(
|
||||
response=get_results, include=get_request["include"]
|
||||
)
|
||||
|
||||
def peek(self, limit: int = 10) -> GetResult:
|
||||
"""Get the first few results in the database up to limit
|
||||
|
||||
Args:
|
||||
limit: The number of results to return.
|
||||
|
||||
Returns:
|
||||
GetResult: A GetResult object containing the results.
|
||||
"""
|
||||
return self._transform_peek_response(
|
||||
self._client._peek(
|
||||
collection_id=self.id,
|
||||
n=limit,
|
||||
tenant=self.tenant,
|
||||
database=self.database,
|
||||
)
|
||||
)
|
||||
|
||||
def query(
|
||||
self,
|
||||
query_embeddings: Optional[
|
||||
Union[
|
||||
OneOrMany[Embedding],
|
||||
OneOrMany[PyEmbedding],
|
||||
]
|
||||
] = None,
|
||||
query_texts: Optional[OneOrMany[Document]] = None,
|
||||
query_images: Optional[OneOrMany[Image]] = None,
|
||||
query_uris: Optional[OneOrMany[URI]] = None,
|
||||
ids: Optional[OneOrMany[ID]] = None,
|
||||
n_results: int = 10,
|
||||
where: Optional[Where] = None,
|
||||
where_document: Optional[WhereDocument] = None,
|
||||
include: Include = [
|
||||
"metadatas",
|
||||
"documents",
|
||||
"distances",
|
||||
],
|
||||
) -> QueryResult:
|
||||
"""Get the n_results nearest neighbor embeddings for provided query_embeddings or query_texts.
|
||||
|
||||
Args:
|
||||
query_embeddings: The embeddings to get the closes neighbors of. Optional.
|
||||
query_texts: The document texts to get the closes neighbors of. Optional.
|
||||
query_images: The images to get the closes neighbors of. Optional.
|
||||
query_uris: The URIs to be used with data loader. Optional.
|
||||
ids: A subset of ids to search within. Optional.
|
||||
n_results: The number of neighbors to return for each query_embedding or query_texts. Optional.
|
||||
where: A Where type dict used to filter results by. E.g. `{"$and": [{"color" : "red"}, {"price": {"$gte": 4.20}}]}`. Optional.
|
||||
where_document: A WhereDocument type dict used to filter by the documents. E.g. `{"$contains": "hello"}`. Optional.
|
||||
include: A list of what to include in the results. Can contain `"embeddings"`, `"metadatas"`, `"documents"`, `"distances"`. Ids are always included. Defaults to `["metadatas", "documents", "distances"]`. Optional.
|
||||
|
||||
Returns:
|
||||
QueryResult: A QueryResult object containing the results.
|
||||
|
||||
Raises:
|
||||
ValueError: If you don't provide either query_embeddings, query_texts, or query_images
|
||||
ValueError: If you provide both query_embeddings and query_texts
|
||||
ValueError: If you provide both query_embeddings and query_images
|
||||
ValueError: If you provide both query_texts and query_images
|
||||
|
||||
"""
|
||||
|
||||
query_request = self._validate_and_prepare_query_request(
|
||||
query_embeddings=query_embeddings,
|
||||
query_texts=query_texts,
|
||||
query_images=query_images,
|
||||
query_uris=query_uris,
|
||||
ids=ids,
|
||||
n_results=n_results,
|
||||
where=where,
|
||||
where_document=where_document,
|
||||
include=include,
|
||||
)
|
||||
|
||||
query_results = self._client._query(
|
||||
collection_id=self.id,
|
||||
ids=query_request["ids"],
|
||||
query_embeddings=query_request["embeddings"],
|
||||
n_results=query_request["n_results"],
|
||||
where=query_request["where"],
|
||||
where_document=query_request["where_document"],
|
||||
include=query_request["include"],
|
||||
tenant=self.tenant,
|
||||
database=self.database,
|
||||
)
|
||||
|
||||
return self._transform_query_response(
|
||||
response=query_results, include=query_request["include"]
|
||||
)
|
||||
|
||||
def modify(
|
||||
self,
|
||||
name: Optional[str] = None,
|
||||
metadata: Optional[CollectionMetadata] = None,
|
||||
configuration: Optional[UpdateCollectionConfiguration] = None,
|
||||
) -> None:
|
||||
"""Modify the collection name or metadata
|
||||
|
||||
Args:
|
||||
name: The updated name for the collection. Optional.
|
||||
metadata: The updated metadata for the collection. Optional.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
|
||||
self._validate_modify_request(metadata)
|
||||
|
||||
# Note there is a race condition here where the metadata can be updated
|
||||
# but another thread sees the cached local metadata.
|
||||
# TODO: fixme
|
||||
self._client._modify(
|
||||
id=self.id,
|
||||
new_name=name,
|
||||
new_metadata=metadata,
|
||||
new_configuration=configuration,
|
||||
tenant=self.tenant,
|
||||
database=self.database,
|
||||
)
|
||||
|
||||
self._update_model_after_modify_success(name, metadata, configuration)
|
||||
|
||||
def fork(
|
||||
self,
|
||||
new_name: str,
|
||||
) -> "Collection":
|
||||
"""Fork the current collection under a new name. The returning collection should contain identical data to the current collection.
|
||||
This is an experimental API that only works for Hosted Chroma for now.
|
||||
|
||||
Args:
|
||||
new_name: The name of the new collection.
|
||||
|
||||
Returns:
|
||||
Collection: A new collection with the specified name and containing identical data to the current collection.
|
||||
"""
|
||||
model = self._client._fork(
|
||||
collection_id=self.id,
|
||||
new_name=new_name,
|
||||
tenant=self.tenant,
|
||||
database=self.database,
|
||||
)
|
||||
return Collection(
|
||||
client=self._client,
|
||||
model=model,
|
||||
embedding_function=self._embedding_function,
|
||||
data_loader=self._data_loader,
|
||||
)
|
||||
|
||||
def search(
|
||||
self,
|
||||
searches: OneOrMany[Search],
|
||||
) -> SearchResult:
|
||||
"""Perform hybrid search on the collection.
|
||||
This is an experimental API that only works for Hosted Chroma for now.
|
||||
|
||||
Args:
|
||||
searches: A single Search object or a list of Search objects, each containing:
|
||||
- where: Where expression for filtering
|
||||
- rank: Ranking expression for hybrid search (defaults to Val(0.0))
|
||||
- limit: Limit configuration for pagination (defaults to no limit)
|
||||
- select: Select configuration for keys to return (defaults to empty)
|
||||
|
||||
Returns:
|
||||
SearchResult: Column-major format response with:
|
||||
- ids: List of result IDs for each search payload
|
||||
- documents: Optional documents for each payload
|
||||
- embeddings: Optional embeddings for each payload
|
||||
- metadatas: Optional metadata for each payload
|
||||
- scores: Optional scores for each payload
|
||||
- select: List of selected keys for each payload
|
||||
|
||||
Raises:
|
||||
NotImplementedError: For local/segment API implementations
|
||||
|
||||
Examples:
|
||||
# Using builder pattern with Key constants
|
||||
from chromadb.execution.expression import (
|
||||
Search, Key, K, Knn, Val
|
||||
)
|
||||
|
||||
# Note: K is an alias for Key, so K.DOCUMENT == Key.DOCUMENT
|
||||
search = (Search()
|
||||
.where((K("category") == "science") & (K("score") > 0.5))
|
||||
.rank(Knn(query=[0.1, 0.2, 0.3]) * 0.8 + Val(0.5) * 0.2)
|
||||
.limit(10, offset=0)
|
||||
.select(K.DOCUMENT, K.SCORE, "title"))
|
||||
|
||||
# Direct construction
|
||||
from chromadb.execution.expression import (
|
||||
Search, Eq, And, Gt, Knn, Limit, Select, Key
|
||||
)
|
||||
|
||||
search = Search(
|
||||
where=And([Eq("category", "science"), Gt("score", 0.5)]),
|
||||
rank=Knn(query=[0.1, 0.2, 0.3]),
|
||||
limit=Limit(offset=0, limit=10),
|
||||
select=Select(keys={Key.DOCUMENT, Key.SCORE, "title"})
|
||||
)
|
||||
|
||||
# Single search
|
||||
result = collection.search(search)
|
||||
|
||||
# Multiple searches at once
|
||||
searches = [
|
||||
Search().where(K("type") == "article").rank(Knn(query=[0.1, 0.2])),
|
||||
Search().where(K("type") == "paper").rank(Knn(query=[0.3, 0.4]))
|
||||
]
|
||||
results = collection.search(searches)
|
||||
"""
|
||||
# Convert single search to list for consistent handling
|
||||
searches_list = maybe_cast_one_to_many(searches)
|
||||
if searches_list is None:
|
||||
searches_list = []
|
||||
|
||||
# Embed any string queries in Knn objects
|
||||
embedded_searches = [
|
||||
self._embed_search_string_queries(search) for search in searches_list
|
||||
]
|
||||
|
||||
return self._client._search(
|
||||
collection_id=self.id,
|
||||
searches=cast(List[Search], embedded_searches),
|
||||
tenant=self.tenant,
|
||||
database=self.database,
|
||||
)
|
||||
|
||||
def update(
|
||||
self,
|
||||
ids: OneOrMany[ID],
|
||||
embeddings: Optional[
|
||||
Union[
|
||||
OneOrMany[Embedding],
|
||||
OneOrMany[PyEmbedding],
|
||||
]
|
||||
] = None,
|
||||
metadatas: Optional[OneOrMany[Metadata]] = None,
|
||||
documents: Optional[OneOrMany[Document]] = None,
|
||||
images: Optional[OneOrMany[Image]] = None,
|
||||
uris: Optional[OneOrMany[URI]] = None,
|
||||
) -> None:
|
||||
"""Update the embeddings, metadatas or documents for provided ids.
|
||||
|
||||
Args:
|
||||
ids: The ids of the embeddings to update
|
||||
embeddings: The embeddings to update. If None, embeddings will be computed based on the documents or images using the embedding_function set for the Collection. Optional.
|
||||
metadatas: The metadata to associate with the embeddings. When querying, you can filter on this metadata. Optional.
|
||||
documents: The documents to associate with the embeddings. Optional.
|
||||
images: The images to associate with the embeddings. Optional.
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
update_request = self._validate_and_prepare_update_request(
|
||||
ids=ids,
|
||||
embeddings=embeddings,
|
||||
metadatas=metadatas,
|
||||
documents=documents,
|
||||
images=images,
|
||||
uris=uris,
|
||||
)
|
||||
|
||||
self._client._update(
|
||||
collection_id=self.id,
|
||||
ids=update_request["ids"],
|
||||
embeddings=update_request["embeddings"],
|
||||
metadatas=update_request["metadatas"],
|
||||
documents=update_request["documents"],
|
||||
uris=update_request["uris"],
|
||||
tenant=self.tenant,
|
||||
database=self.database,
|
||||
)
|
||||
|
||||
def upsert(
|
||||
self,
|
||||
ids: OneOrMany[ID],
|
||||
embeddings: Optional[
|
||||
Union[
|
||||
OneOrMany[Embedding],
|
||||
OneOrMany[PyEmbedding],
|
||||
]
|
||||
] = None,
|
||||
metadatas: Optional[OneOrMany[Metadata]] = None,
|
||||
documents: Optional[OneOrMany[Document]] = None,
|
||||
images: Optional[OneOrMany[Image]] = None,
|
||||
uris: Optional[OneOrMany[URI]] = None,
|
||||
) -> None:
|
||||
"""Update the embeddings, metadatas or documents for provided ids, or create them if they don't exist.
|
||||
|
||||
Args:
|
||||
ids: The ids of the embeddings to update
|
||||
embeddings: The embeddings to add. If None, embeddings will be computed based on the documents using the embedding_function set for the Collection. Optional.
|
||||
metadatas: The metadata to associate with the embeddings. When querying, you can filter on this metadata. Optional.
|
||||
documents: The documents to associate with the embeddings. Optional.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
upsert_request = self._validate_and_prepare_upsert_request(
|
||||
ids=ids,
|
||||
embeddings=embeddings,
|
||||
metadatas=metadatas,
|
||||
documents=documents,
|
||||
images=images,
|
||||
uris=uris,
|
||||
)
|
||||
|
||||
self._client._upsert(
|
||||
collection_id=self.id,
|
||||
ids=upsert_request["ids"],
|
||||
embeddings=upsert_request["embeddings"],
|
||||
metadatas=upsert_request["metadatas"],
|
||||
documents=upsert_request["documents"],
|
||||
uris=upsert_request["uris"],
|
||||
tenant=self.tenant,
|
||||
database=self.database,
|
||||
)
|
||||
|
||||
def delete(
|
||||
self,
|
||||
ids: Optional[IDs] = None,
|
||||
where: Optional[Where] = None,
|
||||
where_document: Optional[WhereDocument] = None,
|
||||
) -> None:
|
||||
"""Delete the embeddings based on ids and/or a where filter
|
||||
|
||||
Args:
|
||||
ids: The ids of the embeddings to delete
|
||||
where: A Where type dict used to filter the delection by. E.g. `{"$and": [{"color" : "red"}, {"price": {"$gte": 4.20}]}}`. Optional.
|
||||
where_document: A WhereDocument type dict used to filter the deletion by the document content. E.g. `{"$contains": "hello"}`. Optional.
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
Raises:
|
||||
ValueError: If you don't provide either ids, where, or where_document
|
||||
"""
|
||||
delete_request = self._validate_and_prepare_delete_request(
|
||||
ids, where, where_document
|
||||
)
|
||||
|
||||
self._client._delete(
|
||||
collection_id=self.id,
|
||||
ids=delete_request["ids"],
|
||||
where=delete_request["where"],
|
||||
where_document=delete_request["where_document"],
|
||||
tenant=self.tenant,
|
||||
database=self.database,
|
||||
)
|
||||
|
||||
def attach_function(
|
||||
self,
|
||||
function_id: str,
|
||||
name: str,
|
||||
output_collection: str,
|
||||
params: Optional[Dict[str, Any]] = None,
|
||||
) -> "AttachedFunction":
|
||||
"""Attach a function to this collection.
|
||||
|
||||
Args:
|
||||
function_id: Built-in function identifier (e.g., "record_counter")
|
||||
name: Unique name for this attached function
|
||||
output_collection: Name of the collection where function output will be stored
|
||||
params: Optional dictionary with function-specific parameters
|
||||
|
||||
Returns:
|
||||
AttachedFunction: Object representing the attached function
|
||||
|
||||
Example:
|
||||
>>> attached_fn = collection.attach_function(
|
||||
... function_id="record_counter",
|
||||
... name="mycoll_stats_fn",
|
||||
... output_collection="mycoll_stats",
|
||||
... params={"threshold": 100}
|
||||
... )
|
||||
"""
|
||||
return self._client.attach_function(
|
||||
function_id=function_id,
|
||||
name=name,
|
||||
input_collection_id=self.id,
|
||||
output_collection=output_collection,
|
||||
params=params,
|
||||
tenant=self.tenant,
|
||||
database=self.database,
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user