chore: 添加虚拟环境到仓库
- 添加 backend_service/venv 虚拟环境 - 包含所有Python依赖包 - 注意:虚拟环境约393MB,包含12655个文件
This commit is contained in:
@@ -0,0 +1,73 @@
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
import uuid
|
||||
from chromadb.api import ClientAPI
|
||||
from chromadb.errors import ChromaError, UniqueConstraintError
|
||||
|
||||
|
||||
def test_duplicate_collection_create(
|
||||
client: ClientAPI,
|
||||
) -> None:
|
||||
client.reset()
|
||||
|
||||
client.create_collection(
|
||||
name="test",
|
||||
metadata={"hnsw:construction_ef": 128, "hnsw:search_ef": 128, "hnsw:M": 128},
|
||||
)
|
||||
|
||||
try:
|
||||
client.create_collection(
|
||||
name="test",
|
||||
metadata={
|
||||
"hnsw:construction_ef": 128,
|
||||
"hnsw:search_ef": 128,
|
||||
"hnsw:M": 128,
|
||||
},
|
||||
)
|
||||
assert False, "Expected exception"
|
||||
except Exception as e:
|
||||
print("Collection creation failed as expected with error ", e)
|
||||
assert "already exists" in e.args[0] or isinstance(e, UniqueConstraintError)
|
||||
|
||||
|
||||
def test_not_existing_collection_delete(
|
||||
client: ClientAPI,
|
||||
) -> None:
|
||||
try:
|
||||
client.delete_collection(
|
||||
name="test101",
|
||||
)
|
||||
assert False, "Expected exception"
|
||||
except Exception as e:
|
||||
print("Collection deletion failed as expected with error ", e)
|
||||
assert "does not exist" in e.args[0]
|
||||
|
||||
|
||||
def test_multithreaded_get_or_create(client: ClientAPI) -> None:
|
||||
N_THREADS = 50
|
||||
new_name = str(uuid.uuid4())
|
||||
|
||||
def create_maybe_delete_collection(i: int) -> None:
|
||||
try:
|
||||
coll = client.get_or_create_collection(new_name)
|
||||
assert coll.name == new_name
|
||||
except ChromaError as e:
|
||||
if "concurrent" not in e.message():
|
||||
raise e
|
||||
|
||||
try:
|
||||
if i % 2 == 0:
|
||||
client.delete_collection(new_name)
|
||||
except ChromaError as e:
|
||||
if "does not exist" not in e.message():
|
||||
raise e
|
||||
|
||||
# Stress to trigger a potential race condition
|
||||
with ThreadPoolExecutor(max_workers=N_THREADS) as executor:
|
||||
futures = [
|
||||
executor.submit(create_maybe_delete_collection, i) for i in range(N_THREADS)
|
||||
]
|
||||
for future in futures:
|
||||
try:
|
||||
future.result()
|
||||
except Exception as e:
|
||||
assert False, f"Thread raised an exception: {e}"
|
||||
@@ -0,0 +1,82 @@
|
||||
import pytest
|
||||
from chromadb.api.client import AdminClient, Client
|
||||
from chromadb.config import System
|
||||
from chromadb.db.impl.sqlite import SqliteDB
|
||||
from chromadb.errors import NotFoundError
|
||||
from chromadb.test.conftest import ClientFactories
|
||||
|
||||
|
||||
def test_deletes_database(client_factories: ClientFactories) -> None:
|
||||
client = client_factories.create_client()
|
||||
client.reset()
|
||||
|
||||
admin_client = client_factories.create_admin_client_from_system()
|
||||
|
||||
admin_client.create_database("test_delete_database")
|
||||
|
||||
client = client_factories.create_client(database="test_delete_database")
|
||||
collection = client.create_collection("foo")
|
||||
|
||||
admin_client.delete_database("test_delete_database")
|
||||
|
||||
with pytest.raises(NotFoundError):
|
||||
admin_client.get_database("test_delete_database")
|
||||
|
||||
with pytest.raises(NotFoundError):
|
||||
client.get_collection("foo")
|
||||
|
||||
with pytest.raises(NotFoundError):
|
||||
collection.upsert(["foo"], [0.0, 0.0, 0.0])
|
||||
|
||||
|
||||
def test_does_not_affect_other_databases(client_factories: ClientFactories) -> None:
|
||||
client = client_factories.create_client()
|
||||
client.reset()
|
||||
|
||||
admin_client = client_factories.create_admin_client_from_system()
|
||||
|
||||
admin_client.create_database("first")
|
||||
admin_client.create_database("second")
|
||||
|
||||
first_client = client_factories.create_client(database="first")
|
||||
first_client.create_collection("test")
|
||||
|
||||
second_client = client_factories.create_client(database="second")
|
||||
second_collection = second_client.create_collection("test")
|
||||
|
||||
admin_client.delete_database("first")
|
||||
|
||||
assert second_client.get_collection("test").id == second_collection.id
|
||||
|
||||
with pytest.raises(NotFoundError):
|
||||
first_client.get_collection("test")
|
||||
|
||||
|
||||
def test_collection_was_removed(sqlite_persistent: System) -> None:
|
||||
sqlite = sqlite_persistent.instance(SqliteDB)
|
||||
|
||||
admin_client = AdminClient.from_system(sqlite_persistent)
|
||||
admin_client.create_database("test_delete_database")
|
||||
|
||||
client = Client.from_system(sqlite_persistent, database="test_delete_database")
|
||||
client.create_collection("foo")
|
||||
|
||||
admin_client.delete_database("test_delete_database")
|
||||
|
||||
with pytest.raises(NotFoundError):
|
||||
client.get_collection("foo")
|
||||
|
||||
# Check table
|
||||
with sqlite.tx() as cur:
|
||||
row = cur.execute("SELECT COUNT(*) from collections").fetchone()
|
||||
assert row[0] == 0
|
||||
|
||||
|
||||
def test_errors_when_database_does_not_exist(client_factories: ClientFactories) -> None:
|
||||
client = client_factories.create_client()
|
||||
client.reset()
|
||||
|
||||
admin_client = client_factories.create_admin_client_from_system()
|
||||
|
||||
with pytest.raises(NotFoundError):
|
||||
admin_client.delete_database("foo")
|
||||
@@ -0,0 +1,8 @@
|
||||
import pytest
|
||||
from chromadb.errors import NotFoundError
|
||||
from chromadb.test.conftest import ClientFactories
|
||||
|
||||
|
||||
def test_get_database_not_found(client_factories: ClientFactories) -> None:
|
||||
with pytest.raises(NotFoundError):
|
||||
client_factories.create_client(database="does_not_exist")
|
||||
@@ -0,0 +1,17 @@
|
||||
import numpy as np
|
||||
from chromadb.api import ClientAPI
|
||||
|
||||
|
||||
def test_invalid_update(client: ClientAPI) -> None:
|
||||
client.reset()
|
||||
|
||||
collection = client.create_collection("test")
|
||||
|
||||
# Update is invalid because ID does not exist
|
||||
collection.update(ids=["foo"], embeddings=[[0.0, 0.0, 0.0]])
|
||||
|
||||
collection.add(ids=["foo"], embeddings=[[1.0, 1.0, 1.0]])
|
||||
result = collection.get(ids=["foo"], include=["embeddings"])
|
||||
# Embeddings should be the same as what was provided to .add()
|
||||
assert result["embeddings"] is not None
|
||||
assert np.allclose(result["embeddings"][0], np.array([1.0, 1.0, 1.0]))
|
||||
@@ -0,0 +1,66 @@
|
||||
import logging
|
||||
|
||||
import chromadb.test.property.strategies as strategies
|
||||
import hypothesis.strategies as st
|
||||
from chromadb.api import ClientAPI
|
||||
from chromadb.test.conftest import NOT_CLUSTER_ONLY, reset
|
||||
from chromadb.test.property import invariants
|
||||
from chromadb.test.utils.wait_for_version_increase import wait_for_version_increase
|
||||
from hypothesis import HealthCheck, given, settings
|
||||
|
||||
collection_st = st.shared(
|
||||
strategies.collections(add_filterable_data=True, with_hnsw_params=True),
|
||||
key="coll",
|
||||
)
|
||||
recordset_st = st.shared(
|
||||
strategies.recordsets(collection_st, max_size=1000), key="recordset"
|
||||
)
|
||||
|
||||
|
||||
@settings(
|
||||
deadline=90000,
|
||||
suppress_health_check=[
|
||||
HealthCheck.function_scoped_fixture,
|
||||
HealthCheck.large_base_example,
|
||||
HealthCheck.filter_too_much,
|
||||
],
|
||||
) # type: ignore
|
||||
@given(
|
||||
collection=collection_st,
|
||||
record_set=recordset_st,
|
||||
limit=st.integers(min_value=1, max_value=10),
|
||||
offset=st.integers(min_value=0, max_value=10),
|
||||
should_compact=st.booleans(),
|
||||
)
|
||||
def test_get_limit_offset(
|
||||
caplog,
|
||||
client: ClientAPI,
|
||||
collection: strategies.Collection,
|
||||
record_set: dict,
|
||||
limit: int,
|
||||
offset: int,
|
||||
should_compact: bool,
|
||||
) -> None:
|
||||
caplog.set_level(logging.ERROR)
|
||||
|
||||
reset(client)
|
||||
coll = client.create_collection(
|
||||
name=collection.name,
|
||||
metadata=collection.metadata, # type: ignore
|
||||
embedding_function=collection.embedding_function,
|
||||
)
|
||||
|
||||
initial_version = coll.get_model()["version"]
|
||||
|
||||
coll.add(**record_set)
|
||||
|
||||
if not NOT_CLUSTER_ONLY:
|
||||
# Only wait for compaction if the size of the collection is
|
||||
# some minimal size
|
||||
if should_compact and len(invariants.wrap(record_set["ids"])) > 10:
|
||||
# Wait for the model to be updated
|
||||
wait_for_version_increase(client, collection.name, initial_version)
|
||||
|
||||
result_ids = coll.get(offset=offset, limit=limit)["ids"]
|
||||
all_offset_ids = coll.get()["ids"]
|
||||
assert result_ids == all_offset_ids[offset : offset + limit]
|
||||
@@ -0,0 +1,89 @@
|
||||
from typing import Dict, List
|
||||
from hypothesis import given
|
||||
from chromadb.test.conftest import ClientFactories
|
||||
import hypothesis.strategies as st
|
||||
|
||||
|
||||
def test_list_databases(client_factories: ClientFactories) -> None:
|
||||
client = client_factories.create_client()
|
||||
client.reset()
|
||||
admin_client = client_factories.create_admin_client_from_system()
|
||||
|
||||
for i in range(10):
|
||||
admin_client.create_database(f"test_list_databases_{i}")
|
||||
|
||||
databases = admin_client.list_databases()
|
||||
assert len(databases) == 11 # add 1 for the default_database
|
||||
|
||||
for i in range(10):
|
||||
assert any(d["name"] == f"test_list_databases_{i}" for d in databases)
|
||||
|
||||
assert any(d["name"] == "default_database" for d in databases)
|
||||
|
||||
|
||||
@st.composite
|
||||
def tenants_and_databases_st(
|
||||
draw: st.DrawFn, max_tenants: int, max_databases: int
|
||||
) -> Dict[str, List[str]]:
|
||||
"""Generates a set of random tenants and databases. Each database is assigned to a random tenant. Returns a dictionary where the key is the tenant name and the value is a list of database names for that tenant."""
|
||||
num_tenants = draw(st.integers(min_value=1, max_value=max_tenants))
|
||||
num_databases = draw(st.integers(min_value=0, max_value=max_databases))
|
||||
|
||||
database_i_to_tenant_i = draw(
|
||||
st.lists(
|
||||
st.integers(min_value=0, max_value=num_tenants - 1),
|
||||
min_size=num_databases,
|
||||
max_size=num_databases,
|
||||
)
|
||||
)
|
||||
|
||||
tenants = [f"tenant_{i}" for i in range(num_tenants)]
|
||||
databases = [f"database_{i}" for i in range(num_databases)]
|
||||
|
||||
result: Dict[str, List[str]] = {}
|
||||
for database_i, tenant_i in enumerate(database_i_to_tenant_i):
|
||||
tenant = tenants[tenant_i]
|
||||
database = databases[database_i]
|
||||
|
||||
if tenant not in result:
|
||||
result[tenant] = []
|
||||
|
||||
result[tenant].append(database)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@given(
|
||||
limit=st.integers(min_value=1, max_value=10),
|
||||
offset=st.integers(min_value=0, max_value=10),
|
||||
tenants_and_databases=tenants_and_databases_st(max_tenants=10, max_databases=10),
|
||||
)
|
||||
def test_list_databases_with_limit_offset(
|
||||
limit: int,
|
||||
offset: int,
|
||||
tenants_and_databases: Dict[str, List[str]],
|
||||
client_factories: ClientFactories,
|
||||
) -> None:
|
||||
client = client_factories.create_client()
|
||||
client.reset()
|
||||
|
||||
admin_client = client_factories.create_admin_client_from_system()
|
||||
|
||||
for tenant, databases in tenants_and_databases.items():
|
||||
admin_client.create_tenant(tenant)
|
||||
|
||||
for database in databases:
|
||||
admin_client.create_database(database, tenant)
|
||||
|
||||
for tenant, all_databases in tenants_and_databases.items():
|
||||
listed_databases = admin_client.list_databases(
|
||||
limit=limit, offset=offset, tenant=tenant
|
||||
)
|
||||
expected_databases = all_databases[offset : offset + limit]
|
||||
|
||||
if limit + offset > len(all_databases):
|
||||
assert len(listed_databases) == max(len(all_databases) - offset, 0)
|
||||
assert [d["name"] for d in listed_databases] == expected_databases
|
||||
else:
|
||||
assert len(listed_databases) == limit
|
||||
assert [d["name"] for d in listed_databases] == expected_databases
|
||||
@@ -0,0 +1,62 @@
|
||||
# Tests that various combinations of numpy and python lists work as expected as inputs
|
||||
# to add/query/update/upsert operations
|
||||
|
||||
from typing import Any, Dict, List
|
||||
import numpy as np
|
||||
from chromadb.api import ClientAPI
|
||||
from chromadb.api.models.Collection import Collection
|
||||
from chromadb.test.conftest import reset
|
||||
|
||||
|
||||
def add_and_validate(
|
||||
collection: Collection,
|
||||
ids: List[str],
|
||||
embeddings: Any,
|
||||
metadatas: List[Dict[str, Any]],
|
||||
documents: List[str],
|
||||
) -> None:
|
||||
collection.add(ids=ids, embeddings=embeddings, metadatas=metadatas, documents=documents) # type: ignore
|
||||
|
||||
results = collection.get(include=["metadatas", "documents", "embeddings"]) # type: ignore
|
||||
assert results["ids"] == ids
|
||||
assert results["metadatas"] == metadatas
|
||||
assert results["documents"] == documents
|
||||
# Using integers instead of floats to avoid floating point comparison issues
|
||||
assert np.array_equal(results["embeddings"], embeddings) # type: ignore
|
||||
|
||||
|
||||
def test_py_list_of_numpy(client: ClientAPI) -> None:
|
||||
reset(client)
|
||||
coll = client.create_collection("test")
|
||||
ids = ["1", "2", "3"]
|
||||
embeddings = [np.array([1, 2, 3]), np.array([1, 2, 3]), np.array([1, 2, 3])]
|
||||
metadatas = [{"a": 1}, {"a": 2}, {"a": 3}]
|
||||
documents = ["a", "b", "c"]
|
||||
|
||||
# List of numpy arrays
|
||||
add_and_validate(coll, ids, embeddings, metadatas, documents)
|
||||
|
||||
|
||||
def test_py_list_of_py(client: ClientAPI) -> None:
|
||||
reset(client)
|
||||
coll = client.create_collection("test")
|
||||
ids = ["4", "5", "6"]
|
||||
embeddings = [[1, 2, 3], [1, 2, 3], [1, 2, 3]]
|
||||
metadatas = [{"a": 4}, {"a": 5}, {"a": 6}]
|
||||
documents = ["d", "e", "f"]
|
||||
|
||||
# List of python lists
|
||||
add_and_validate(coll, ids, embeddings, metadatas, documents)
|
||||
|
||||
|
||||
def test_numpy(client: ClientAPI) -> None:
|
||||
reset(client)
|
||||
coll = client.create_collection("test")
|
||||
|
||||
ids = ["7", "8", "9"]
|
||||
embeddings = np.array([[1, 2, 3], [1, 2, 3], [1, 2, 3]])
|
||||
metadata = [{"a": 7}, {"a": 8}, {"a": 9}]
|
||||
documents = ["g", "h", "i"]
|
||||
|
||||
# Numpy array
|
||||
add_and_validate(coll, ids, embeddings, metadata, documents)
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,105 @@
|
||||
import pytest
|
||||
from typing import List, cast, Dict, Any
|
||||
from chromadb.api.types import Documents, Image, Document, Embeddings
|
||||
from chromadb.utils.embedding_functions import (
|
||||
EmbeddingFunction,
|
||||
register_embedding_function,
|
||||
)
|
||||
import numpy as np
|
||||
|
||||
|
||||
def random_embeddings() -> Embeddings:
|
||||
return cast(
|
||||
Embeddings, [embedding for embedding in np.random.random(size=(10, 10))]
|
||||
)
|
||||
|
||||
|
||||
def random_image() -> Image:
|
||||
return np.random.randint(0, 255, size=(10, 10, 3), dtype=np.int64)
|
||||
|
||||
|
||||
def random_documents() -> List[Document]:
|
||||
return [str(random_image()) for _ in range(10)]
|
||||
|
||||
|
||||
def test_embedding_function_results_format_when_response_is_valid() -> None:
|
||||
valid_embeddings = random_embeddings()
|
||||
|
||||
@register_embedding_function
|
||||
class TestEmbeddingFunction(EmbeddingFunction[Documents]):
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def name() -> str:
|
||||
return "test"
|
||||
|
||||
@staticmethod
|
||||
def build_from_config(config: Dict[str, Any]) -> "EmbeddingFunction[Documents]":
|
||||
return TestEmbeddingFunction()
|
||||
|
||||
def get_config(self) -> Dict[str, Any]:
|
||||
return {}
|
||||
|
||||
def __call__(self, input: Documents) -> Embeddings:
|
||||
return valid_embeddings
|
||||
|
||||
@staticmethod
|
||||
def validate_config(config: Dict[str, Any]) -> None:
|
||||
pass
|
||||
|
||||
def validate_config_update(
|
||||
self, old_config: Dict[str, Any], new_config: Dict[str, Any]
|
||||
) -> None:
|
||||
pass
|
||||
|
||||
ef = TestEmbeddingFunction()
|
||||
|
||||
embeddings = ef(random_documents())
|
||||
for i, e in enumerate(embeddings):
|
||||
assert np.array_equal(e, valid_embeddings[i])
|
||||
|
||||
|
||||
def test_embedding_function_results_format_when_response_is_invalid() -> None:
|
||||
invalid_embedding = {"error": "test"}
|
||||
|
||||
@register_embedding_function
|
||||
class TestEmbeddingFunction(EmbeddingFunction[Documents]):
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def name() -> str:
|
||||
return "test"
|
||||
|
||||
@staticmethod
|
||||
def build_from_config(config: Dict[str, Any]) -> "EmbeddingFunction[Documents]":
|
||||
return TestEmbeddingFunction()
|
||||
|
||||
def get_config(self) -> Dict[str, Any]:
|
||||
return {}
|
||||
|
||||
@staticmethod
|
||||
def validate_config(config: Dict[str, Any]) -> None:
|
||||
pass
|
||||
|
||||
def validate_config_update(
|
||||
self, old_config: Dict[str, Any], new_config: Dict[str, Any]
|
||||
) -> None:
|
||||
pass
|
||||
|
||||
def __call__(self, input: Documents) -> Embeddings:
|
||||
# Return something that's not a valid Embeddings type
|
||||
return cast(Embeddings, invalid_embedding)
|
||||
|
||||
ef = TestEmbeddingFunction()
|
||||
|
||||
# The EmbeddingFunction protocol should validate the return value
|
||||
# but we need to bypass the protocol's __call__ wrapper for this test
|
||||
with pytest.raises(ValueError):
|
||||
# This should raise a ValueError during normalization/validation
|
||||
result = ef.__call__(random_documents())
|
||||
# The normalize_embeddings function will raise a ValueError when given an invalid embedding
|
||||
from chromadb.api.types import normalize_embeddings
|
||||
|
||||
normalize_embeddings(result)
|
||||
Reference in New Issue
Block a user