chore: 添加虚拟环境到仓库
- 添加 backend_service/venv 虚拟环境 - 包含所有Python依赖包 - 注意:虚拟环境约393MB,包含12655个文件
This commit is contained in:
@@ -0,0 +1,10 @@
|
||||
# Telemetry
|
||||
|
||||
This directory holds all the telemetry for Chroma.
|
||||
|
||||
- `product/` contains anonymized product telemetry which we, Chroma, collect so we can
|
||||
understand usage patterns. For more information, see https://docs.trychroma.com/telemetry.
|
||||
- `opentelemetry/` contains all of the config for Chroma's [OpenTelemetry](https://opentelemetry.io/docs/instrumentation/python/getting-started/)
|
||||
setup. These metrics are *not* sent back to Chroma -- anyone operating a Chroma instance
|
||||
can use the OpenTelemetry metrics and traces to understand how their instance of Chroma
|
||||
is behaving.
|
||||
Binary file not shown.
@@ -0,0 +1,187 @@
|
||||
import asyncio
|
||||
import os
|
||||
from functools import wraps
|
||||
from enum import Enum
|
||||
from typing import Any, Callable, Dict, Optional, Sequence, Union, TypeVar
|
||||
|
||||
from opentelemetry import trace
|
||||
from opentelemetry.sdk.resources import SERVICE_NAME, Resource
|
||||
from opentelemetry.sdk.trace import TracerProvider
|
||||
from opentelemetry.sdk.trace.export import (
|
||||
BatchSpanProcessor,
|
||||
)
|
||||
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
|
||||
|
||||
from chromadb.config import Component
|
||||
from chromadb.config import System
|
||||
|
||||
|
||||
class OpenTelemetryGranularity(Enum):
|
||||
"""The granularity of the OpenTelemetry spans."""
|
||||
|
||||
NONE = "none"
|
||||
"""No spans are emitted."""
|
||||
|
||||
OPERATION = "operation"
|
||||
"""Spans are emitted for each operation."""
|
||||
|
||||
OPERATION_AND_SEGMENT = "operation_and_segment"
|
||||
"""Spans are emitted for each operation and segment."""
|
||||
|
||||
ALL = "all"
|
||||
"""Spans are emitted for almost every method call."""
|
||||
|
||||
# Greater is more restrictive. So "all" < "operation" (and everything else),
|
||||
# "none" > everything.
|
||||
def __lt__(self, other: Any) -> bool:
|
||||
"""Compare two granularities."""
|
||||
order = [
|
||||
OpenTelemetryGranularity.ALL,
|
||||
OpenTelemetryGranularity.OPERATION_AND_SEGMENT,
|
||||
OpenTelemetryGranularity.OPERATION,
|
||||
OpenTelemetryGranularity.NONE,
|
||||
]
|
||||
return order.index(self) < order.index(other)
|
||||
|
||||
|
||||
class OpenTelemetryClient(Component):
|
||||
def __init__(self, system: System):
|
||||
super().__init__(system)
|
||||
otel_init(
|
||||
system.settings.chroma_otel_service_name,
|
||||
system.settings.chroma_otel_collection_endpoint,
|
||||
system.settings.chroma_otel_collection_headers,
|
||||
OpenTelemetryGranularity(
|
||||
system.settings.chroma_otel_granularity
|
||||
if system.settings.chroma_otel_granularity
|
||||
else "none"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
tracer: Optional[trace.Tracer] = None
|
||||
granularity: OpenTelemetryGranularity = OpenTelemetryGranularity("none")
|
||||
|
||||
|
||||
def otel_init(
|
||||
otel_service_name: Optional[str],
|
||||
otel_collection_endpoint: Optional[str],
|
||||
otel_collection_headers: Optional[Dict[str, str]],
|
||||
otel_granularity: OpenTelemetryGranularity,
|
||||
) -> None:
|
||||
"""Initializes module-level state for OpenTelemetry.
|
||||
|
||||
Parameters match the environment variables which configure OTel as documented
|
||||
at https://docs.trychroma.com/deployment/observability.
|
||||
- otel_service_name: The name of the service for OTel tagging and aggregation.
|
||||
- otel_collection_endpoint: The endpoint to which OTel spans are sent
|
||||
(e.g. api.honeycomb.com).
|
||||
- otel_collection_headers: The headers to send with OTel spans
|
||||
(e.g. {"x-honeycomb-team": "abc123"}).
|
||||
- otel_granularity: The granularity of the spans to emit.
|
||||
"""
|
||||
if otel_granularity == OpenTelemetryGranularity.NONE:
|
||||
return
|
||||
resource = Resource(attributes={SERVICE_NAME: str(otel_service_name)})
|
||||
provider = TracerProvider(resource=resource)
|
||||
provider.add_span_processor(
|
||||
BatchSpanProcessor(
|
||||
# TODO: we may eventually want to make this configurable.
|
||||
OTLPSpanExporter(
|
||||
endpoint=str(otel_collection_endpoint),
|
||||
headers=otel_collection_headers,
|
||||
)
|
||||
)
|
||||
)
|
||||
trace.set_tracer_provider(provider)
|
||||
|
||||
global tracer, granularity
|
||||
tracer = trace.get_tracer(__name__)
|
||||
granularity = otel_granularity
|
||||
|
||||
|
||||
T = TypeVar("T", bound=Callable) # type: ignore[type-arg]
|
||||
|
||||
|
||||
def trace_method(
|
||||
trace_name: str,
|
||||
trace_granularity: OpenTelemetryGranularity,
|
||||
attributes: Optional[
|
||||
Dict[
|
||||
str,
|
||||
Union[
|
||||
str,
|
||||
bool,
|
||||
float,
|
||||
int,
|
||||
Sequence[str],
|
||||
Sequence[bool],
|
||||
Sequence[float],
|
||||
Sequence[int],
|
||||
],
|
||||
]
|
||||
] = None,
|
||||
) -> Callable[[T], T]:
|
||||
"""A decorator that traces a method."""
|
||||
|
||||
def decorator(f: T) -> T:
|
||||
if asyncio.iscoroutinefunction(f):
|
||||
|
||||
@wraps(f)
|
||||
async def async_wrapper(*args, **kwargs): # type: ignore[no-untyped-def]
|
||||
global tracer, granularity
|
||||
if trace_granularity < granularity:
|
||||
return await f(*args, **kwargs)
|
||||
if not tracer:
|
||||
return await f(*args, **kwargs)
|
||||
with tracer.start_as_current_span(trace_name, attributes=attributes):
|
||||
add_attributes_to_current_span(
|
||||
{"pod_name": os.environ.get("HOSTNAME")}
|
||||
)
|
||||
return await f(*args, **kwargs)
|
||||
|
||||
return async_wrapper # type: ignore
|
||||
else:
|
||||
|
||||
@wraps(f)
|
||||
def wrapper(*args, **kwargs): # type: ignore[no-untyped-def]
|
||||
global tracer, granularity
|
||||
if trace_granularity < granularity:
|
||||
return f(*args, **kwargs)
|
||||
if not tracer:
|
||||
return f(*args, **kwargs)
|
||||
with tracer.start_as_current_span(trace_name, attributes=attributes):
|
||||
add_attributes_to_current_span(
|
||||
{"pod_name": os.environ.get("HOSTNAME")}
|
||||
)
|
||||
return f(*args, **kwargs)
|
||||
|
||||
return wrapper # type: ignore
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
def add_attributes_to_current_span(
|
||||
attributes: Dict[
|
||||
str,
|
||||
Union[
|
||||
str,
|
||||
bool,
|
||||
float,
|
||||
int,
|
||||
Sequence[str],
|
||||
Sequence[bool],
|
||||
Sequence[float],
|
||||
Sequence[int],
|
||||
None,
|
||||
],
|
||||
]
|
||||
) -> None:
|
||||
"""Add attributes to the current span."""
|
||||
global tracer, granularity
|
||||
if granularity == OpenTelemetryGranularity.NONE:
|
||||
return
|
||||
if not tracer:
|
||||
return
|
||||
span = trace.get_current_span()
|
||||
span.set_attributes({k: v for k, v in attributes.items() if v is not None})
|
||||
Binary file not shown.
@@ -0,0 +1,10 @@
|
||||
from typing import List, Optional
|
||||
from fastapi import FastAPI
|
||||
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
|
||||
|
||||
|
||||
def instrument_fastapi(app: FastAPI, excluded_urls: Optional[List[str]] = None) -> None:
|
||||
"""Instrument FastAPI to emit OpenTelemetry spans."""
|
||||
FastAPIInstrumentor.instrument_app(
|
||||
app, excluded_urls=",".join(excluded_urls) if excluded_urls else None
|
||||
)
|
||||
@@ -0,0 +1,95 @@
|
||||
import binascii
|
||||
import collections
|
||||
|
||||
import grpc
|
||||
from opentelemetry.trace import StatusCode, SpanKind
|
||||
|
||||
|
||||
class _ClientCallDetails(
|
||||
collections.namedtuple(
|
||||
"_ClientCallDetails", ("method", "timeout", "metadata", "credentials")
|
||||
),
|
||||
grpc.ClientCallDetails,
|
||||
):
|
||||
pass
|
||||
|
||||
|
||||
def _encode_span_id(span_id: int) -> str:
|
||||
return binascii.hexlify(span_id.to_bytes(8, "big")).decode()
|
||||
|
||||
|
||||
def _encode_trace_id(trace_id: int) -> str:
|
||||
return binascii.hexlify(trace_id.to_bytes(16, "big")).decode()
|
||||
|
||||
|
||||
# Using OtelInterceptor with gRPC:
|
||||
# 1. Instantiate the interceptor: interceptors = [OtelInterceptor()]
|
||||
# 2. Intercept the channel: channel = grpc.intercept_channel(channel, *interceptors)
|
||||
|
||||
|
||||
class OtelInterceptor(
|
||||
grpc.UnaryUnaryClientInterceptor,
|
||||
grpc.UnaryStreamClientInterceptor,
|
||||
grpc.StreamUnaryClientInterceptor,
|
||||
grpc.StreamStreamClientInterceptor,
|
||||
):
|
||||
def _intercept_call(self, continuation, client_call_details, request_or_iterator):
|
||||
from chromadb.telemetry.opentelemetry import tracer
|
||||
|
||||
if tracer is None:
|
||||
return continuation(client_call_details, request_or_iterator)
|
||||
with tracer.start_as_current_span(
|
||||
f"RPC {client_call_details.method}", kind=SpanKind.CLIENT
|
||||
) as span:
|
||||
# Prepare metadata for propagation
|
||||
metadata = (
|
||||
client_call_details.metadata[:] if client_call_details.metadata else []
|
||||
)
|
||||
metadata.extend(
|
||||
[
|
||||
(
|
||||
"chroma-traceid",
|
||||
_encode_trace_id(span.get_span_context().trace_id),
|
||||
),
|
||||
("chroma-spanid", _encode_span_id(span.get_span_context().span_id)),
|
||||
]
|
||||
)
|
||||
# Update client call details with new metadata
|
||||
new_client_details = _ClientCallDetails(
|
||||
client_call_details.method,
|
||||
client_call_details.timeout,
|
||||
tuple(metadata), # Ensure metadata is a tuple
|
||||
client_call_details.credentials,
|
||||
)
|
||||
try:
|
||||
result = continuation(new_client_details, request_or_iterator)
|
||||
# Set attributes based on the result
|
||||
if hasattr(result, "details") and result.details():
|
||||
span.set_attribute("rpc.detail", result.details())
|
||||
span.set_attribute("rpc.status_code", result.code().name.lower())
|
||||
span.set_attribute("rpc.status_code_value", result.code().value[0])
|
||||
# Set span status based on gRPC call result
|
||||
if result.code() != grpc.StatusCode.OK:
|
||||
span.set_status(StatusCode.ERROR, description=str(result.code()))
|
||||
return result
|
||||
except Exception as e:
|
||||
# Log exception details and re-raise
|
||||
span.set_attribute("rpc.error", str(e))
|
||||
span.set_status(StatusCode.ERROR, description=str(e))
|
||||
raise
|
||||
|
||||
def intercept_unary_unary(self, continuation, client_call_details, request):
|
||||
return self._intercept_call(continuation, client_call_details, request)
|
||||
|
||||
def intercept_unary_stream(self, continuation, client_call_details, request):
|
||||
return self._intercept_call(continuation, client_call_details, request)
|
||||
|
||||
def intercept_stream_unary(
|
||||
self, continuation, client_call_details, request_iterator
|
||||
):
|
||||
return self._intercept_call(continuation, client_call_details, request_iterator)
|
||||
|
||||
def intercept_stream_stream(
|
||||
self, continuation, client_call_details, request_iterator
|
||||
):
|
||||
return self._intercept_call(continuation, client_call_details, request_iterator)
|
||||
@@ -0,0 +1,97 @@
|
||||
from abc import abstractmethod
|
||||
import os
|
||||
from typing import ClassVar, Dict, Any
|
||||
import uuid
|
||||
import chromadb
|
||||
from chromadb.config import Component
|
||||
from pathlib import Path
|
||||
from enum import Enum
|
||||
|
||||
TELEMETRY_WHITELISTED_SETTINGS = [
|
||||
"chroma_api_impl",
|
||||
"is_persistent",
|
||||
"chroma_server_ssl_enabled",
|
||||
"chroma_server_api_default_path",
|
||||
]
|
||||
|
||||
|
||||
class ServerContext(Enum):
|
||||
NONE = "None"
|
||||
FASTAPI = "FastAPI"
|
||||
|
||||
|
||||
class ProductTelemetryEvent:
|
||||
max_batch_size: ClassVar[int] = 1
|
||||
batch_size: int
|
||||
|
||||
def __init__(self, batch_size: int = 1):
|
||||
self.batch_size = batch_size
|
||||
|
||||
@property
|
||||
def properties(self) -> Dict[str, Any]:
|
||||
return self.__dict__
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return self.__class__.__name__
|
||||
|
||||
# A batch key is used to determine whether two events can be batched together.
|
||||
# If a TelemetryEvent's max_batch_size > 1, batch_key() and batch() MUST be
|
||||
# implemented.
|
||||
# Otherwise they are ignored.
|
||||
@property
|
||||
def batch_key(self) -> str:
|
||||
return self.name
|
||||
|
||||
def batch(self, other: "ProductTelemetryEvent") -> "ProductTelemetryEvent":
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class ProductTelemetryClient(Component):
|
||||
USER_ID_PATH = str(Path.home() / ".cache" / "chroma" / "telemetry_user_id")
|
||||
UNKNOWN_USER_ID = "UNKNOWN"
|
||||
SERVER_CONTEXT: ServerContext = ServerContext.NONE
|
||||
_curr_user_id = None
|
||||
|
||||
@abstractmethod
|
||||
def capture(self, event: ProductTelemetryEvent) -> None:
|
||||
pass
|
||||
|
||||
@property
|
||||
def context(self) -> Dict[str, Any]:
|
||||
chroma_version = chromadb.__version__
|
||||
settings = chromadb.get_settings()
|
||||
telemetry_settings = {}
|
||||
for whitelisted in TELEMETRY_WHITELISTED_SETTINGS:
|
||||
telemetry_settings[whitelisted] = settings[whitelisted]
|
||||
|
||||
hosted = self._system.settings.chroma_server_host == "api.trychroma.com"
|
||||
|
||||
self._context = {
|
||||
"chroma_version": chroma_version,
|
||||
"server_context": self.SERVER_CONTEXT.value,
|
||||
"hosted": hosted,
|
||||
**telemetry_settings,
|
||||
}
|
||||
return self._context
|
||||
|
||||
@property
|
||||
def user_id(self) -> str:
|
||||
if self._curr_user_id:
|
||||
return self._curr_user_id
|
||||
|
||||
# File access may fail due to permissions or other reasons. We don't want to
|
||||
# crash so we catch all exceptions.
|
||||
try:
|
||||
if not os.path.exists(self.USER_ID_PATH):
|
||||
os.makedirs(os.path.dirname(self.USER_ID_PATH), exist_ok=True)
|
||||
with open(self.USER_ID_PATH, "w") as f:
|
||||
new_user_id = str(uuid.uuid4())
|
||||
f.write(new_user_id)
|
||||
self._curr_user_id = new_user_id
|
||||
else:
|
||||
with open(self.USER_ID_PATH, "r") as f:
|
||||
self._curr_user_id = f.read()
|
||||
except Exception:
|
||||
self._curr_user_id = self.UNKNOWN_USER_ID
|
||||
return self._curr_user_id
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,256 @@
|
||||
import os
|
||||
from typing import cast, ClassVar
|
||||
from chromadb.telemetry.product import ProductTelemetryEvent
|
||||
|
||||
|
||||
class ClientStartEvent(ProductTelemetryEvent):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
# Lazy import to avoid circular imports
|
||||
from chromadb import is_in_colab
|
||||
|
||||
self.in_colab = is_in_colab()
|
||||
|
||||
|
||||
class ServerStartEvent(ProductTelemetryEvent):
|
||||
is_cli: bool
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.is_cli = os.environ.get("CHROMA_CLI", "False") == "True"
|
||||
|
||||
|
||||
# TODO: Re-enable embedding function tracking in create_collection
|
||||
class ClientCreateCollectionEvent(ProductTelemetryEvent):
|
||||
collection_uuid: str
|
||||
# embedding_function: str
|
||||
|
||||
def __init__(self, collection_uuid: str): # , embedding_function: str):
|
||||
super().__init__()
|
||||
self.collection_uuid = collection_uuid
|
||||
|
||||
# embedding_function_names = get_builtins()
|
||||
|
||||
# self.embedding_function = (
|
||||
# embedding_function
|
||||
# if embedding_function in embedding_function_names
|
||||
# else "custom"
|
||||
# )
|
||||
|
||||
|
||||
class CollectionAddEvent(ProductTelemetryEvent):
|
||||
max_batch_size: ClassVar[int] = 3000
|
||||
batch_size: int
|
||||
collection_uuid: str
|
||||
add_amount: int
|
||||
with_documents: int
|
||||
with_metadata: int
|
||||
with_uris: int
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
collection_uuid: str,
|
||||
add_amount: int,
|
||||
with_documents: int,
|
||||
with_metadata: int,
|
||||
with_uris: int,
|
||||
batch_size: int = 1,
|
||||
):
|
||||
super().__init__()
|
||||
self.collection_uuid = collection_uuid
|
||||
self.add_amount = add_amount
|
||||
self.with_documents = with_documents
|
||||
self.with_metadata = with_metadata
|
||||
self.with_uris = with_uris
|
||||
self.batch_size = batch_size
|
||||
|
||||
@property
|
||||
def batch_key(self) -> str:
|
||||
return self.collection_uuid + self.name
|
||||
|
||||
def batch(self, other: "ProductTelemetryEvent") -> "CollectionAddEvent":
|
||||
if not self.batch_key == other.batch_key:
|
||||
raise ValueError("Cannot batch events")
|
||||
other = cast(CollectionAddEvent, other)
|
||||
total_amount = self.add_amount + other.add_amount
|
||||
return CollectionAddEvent(
|
||||
collection_uuid=self.collection_uuid,
|
||||
add_amount=total_amount,
|
||||
with_documents=self.with_documents + other.with_documents,
|
||||
with_metadata=self.with_metadata + other.with_metadata,
|
||||
with_uris=self.with_uris + other.with_uris,
|
||||
batch_size=self.batch_size + other.batch_size,
|
||||
)
|
||||
|
||||
|
||||
class CollectionUpdateEvent(ProductTelemetryEvent):
|
||||
max_batch_size: ClassVar[int] = 300
|
||||
batch_size: int
|
||||
collection_uuid: str
|
||||
update_amount: int
|
||||
with_embeddings: int
|
||||
with_metadata: int
|
||||
with_documents: int
|
||||
with_uris: int
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
collection_uuid: str,
|
||||
update_amount: int,
|
||||
with_embeddings: int,
|
||||
with_metadata: int,
|
||||
with_documents: int,
|
||||
with_uris: int,
|
||||
batch_size: int = 1,
|
||||
):
|
||||
super().__init__()
|
||||
self.collection_uuid = collection_uuid
|
||||
self.update_amount = update_amount
|
||||
self.with_embeddings = with_embeddings
|
||||
self.with_metadata = with_metadata
|
||||
self.with_documents = with_documents
|
||||
self.with_uris = with_uris
|
||||
self.batch_size = batch_size
|
||||
|
||||
@property
|
||||
def batch_key(self) -> str:
|
||||
return self.collection_uuid + self.name
|
||||
|
||||
def batch(self, other: "ProductTelemetryEvent") -> "CollectionUpdateEvent":
|
||||
if not self.batch_key == other.batch_key:
|
||||
raise ValueError("Cannot batch events")
|
||||
other = cast(CollectionUpdateEvent, other)
|
||||
total_amount = self.update_amount + other.update_amount
|
||||
return CollectionUpdateEvent(
|
||||
collection_uuid=self.collection_uuid,
|
||||
update_amount=total_amount,
|
||||
with_documents=self.with_documents + other.with_documents,
|
||||
with_metadata=self.with_metadata + other.with_metadata,
|
||||
with_embeddings=self.with_embeddings + other.with_embeddings,
|
||||
with_uris=self.with_uris + other.with_uris,
|
||||
batch_size=self.batch_size + other.batch_size,
|
||||
)
|
||||
|
||||
|
||||
class CollectionQueryEvent(ProductTelemetryEvent):
|
||||
max_batch_size: ClassVar[int] = 3000
|
||||
batch_size: int
|
||||
collection_uuid: str
|
||||
query_amount: int
|
||||
filtered_ids_amount: int
|
||||
with_metadata_filter: int
|
||||
with_document_filter: int
|
||||
n_results: int
|
||||
include_metadatas: int
|
||||
include_documents: int
|
||||
include_uris: int
|
||||
include_distances: int
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
collection_uuid: str,
|
||||
query_amount: int,
|
||||
filtered_ids_amount: int,
|
||||
with_metadata_filter: int,
|
||||
with_document_filter: int,
|
||||
n_results: int,
|
||||
include_metadatas: int,
|
||||
include_documents: int,
|
||||
include_uris: int,
|
||||
include_distances: int,
|
||||
batch_size: int = 1,
|
||||
):
|
||||
super().__init__()
|
||||
self.collection_uuid = collection_uuid
|
||||
self.query_amount = query_amount
|
||||
self.filtered_ids_amount = filtered_ids_amount
|
||||
self.with_metadata_filter = with_metadata_filter
|
||||
self.with_document_filter = with_document_filter
|
||||
self.n_results = n_results
|
||||
self.include_metadatas = include_metadatas
|
||||
self.include_documents = include_documents
|
||||
self.include_uris = include_uris
|
||||
self.include_distances = include_distances
|
||||
self.batch_size = batch_size
|
||||
|
||||
@property
|
||||
def batch_key(self) -> str:
|
||||
return self.collection_uuid + self.name
|
||||
|
||||
def batch(self, other: "ProductTelemetryEvent") -> "CollectionQueryEvent":
|
||||
if not self.batch_key == other.batch_key:
|
||||
raise ValueError("Cannot batch events")
|
||||
other = cast(CollectionQueryEvent, other)
|
||||
total_amount = self.query_amount + other.query_amount
|
||||
return CollectionQueryEvent(
|
||||
collection_uuid=self.collection_uuid,
|
||||
query_amount=total_amount,
|
||||
filtered_ids_amount=self.filtered_ids_amount + other.filtered_ids_amount,
|
||||
with_metadata_filter=self.with_metadata_filter + other.with_metadata_filter,
|
||||
with_document_filter=self.with_document_filter + other.with_document_filter,
|
||||
n_results=self.n_results + other.n_results,
|
||||
include_metadatas=self.include_metadatas + other.include_metadatas,
|
||||
include_documents=self.include_documents + other.include_documents,
|
||||
include_uris=self.include_uris + other.include_uris,
|
||||
include_distances=self.include_distances + other.include_distances,
|
||||
batch_size=self.batch_size + other.batch_size,
|
||||
)
|
||||
|
||||
|
||||
class CollectionGetEvent(ProductTelemetryEvent):
|
||||
max_batch_size: ClassVar[int] = 300
|
||||
batch_size: int
|
||||
collection_uuid: str
|
||||
ids_count: int
|
||||
limit: int
|
||||
include_metadata: int
|
||||
include_documents: int
|
||||
include_uris: int
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
collection_uuid: str,
|
||||
ids_count: int,
|
||||
limit: int,
|
||||
include_metadata: int,
|
||||
include_documents: int,
|
||||
include_uris: int,
|
||||
batch_size: int = 1,
|
||||
):
|
||||
super().__init__()
|
||||
self.collection_uuid = collection_uuid
|
||||
self.ids_count = ids_count
|
||||
self.limit = limit
|
||||
self.include_metadata = include_metadata
|
||||
self.include_documents = include_documents
|
||||
self.include_uris = include_uris
|
||||
self.batch_size = batch_size
|
||||
|
||||
@property
|
||||
def batch_key(self) -> str:
|
||||
return self.collection_uuid + self.name + str(self.limit)
|
||||
|
||||
def batch(self, other: "ProductTelemetryEvent") -> "CollectionGetEvent":
|
||||
if not self.batch_key == other.batch_key:
|
||||
raise ValueError("Cannot batch events")
|
||||
other = cast(CollectionGetEvent, other)
|
||||
total_amount = self.ids_count + other.ids_count
|
||||
return CollectionGetEvent(
|
||||
collection_uuid=self.collection_uuid,
|
||||
ids_count=total_amount,
|
||||
limit=self.limit,
|
||||
include_metadata=self.include_metadata + other.include_metadata,
|
||||
include_documents=self.include_documents + other.include_documents,
|
||||
include_uris=self.include_uris + other.include_uris,
|
||||
batch_size=self.batch_size + other.batch_size,
|
||||
)
|
||||
|
||||
|
||||
class CollectionDeleteEvent(ProductTelemetryEvent):
|
||||
collection_uuid: str
|
||||
delete_amount: int
|
||||
|
||||
def __init__(self, collection_uuid: str, delete_amount: int):
|
||||
super().__init__()
|
||||
self.collection_uuid = collection_uuid
|
||||
self.delete_amount = delete_amount
|
||||
@@ -0,0 +1,61 @@
|
||||
import posthog
|
||||
import logging
|
||||
import sys
|
||||
from typing import Any, Dict, Set
|
||||
from chromadb.config import System
|
||||
from chromadb.telemetry.product import (
|
||||
ProductTelemetryClient,
|
||||
ProductTelemetryEvent,
|
||||
)
|
||||
from overrides import override
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
POSTHOG_EVENT_SETTINGS = {"$process_person_profile": False}
|
||||
|
||||
|
||||
class Posthog(ProductTelemetryClient):
|
||||
def __init__(self, system: System):
|
||||
if not system.settings.anonymized_telemetry or "pytest" in sys.modules:
|
||||
posthog.disabled = True
|
||||
else:
|
||||
logger.info(
|
||||
"Anonymized telemetry enabled. See \
|
||||
https://docs.trychroma.com/telemetry for more information."
|
||||
)
|
||||
|
||||
posthog.project_api_key = "phc_YeUxaojbKk5KPi8hNlx1bBKHzuZ4FDtl67kH1blv8Bh"
|
||||
posthog_logger = logging.getLogger("posthog")
|
||||
# Silence posthog's logging
|
||||
posthog_logger.disabled = True
|
||||
|
||||
self.batched_events: Dict[str, ProductTelemetryEvent] = {}
|
||||
self.seen_event_types: Set[Any] = set()
|
||||
|
||||
super().__init__(system)
|
||||
|
||||
@override
|
||||
def capture(self, event: ProductTelemetryEvent) -> None:
|
||||
if event.max_batch_size == 1 or event.batch_key not in self.seen_event_types:
|
||||
self.seen_event_types.add(event.batch_key)
|
||||
self._direct_capture(event)
|
||||
return
|
||||
batch_key = event.batch_key
|
||||
if batch_key not in self.batched_events:
|
||||
self.batched_events[batch_key] = event
|
||||
return
|
||||
batched_event = self.batched_events[batch_key].batch(event)
|
||||
self.batched_events[batch_key] = batched_event
|
||||
if batched_event.batch_size >= batched_event.max_batch_size:
|
||||
self._direct_capture(batched_event)
|
||||
del self.batched_events[batch_key]
|
||||
|
||||
def _direct_capture(self, event: ProductTelemetryEvent) -> None:
|
||||
try:
|
||||
posthog.capture(
|
||||
self.user_id,
|
||||
event.name,
|
||||
{**event.properties, **POSTHOG_EVENT_SETTINGS, **self.context},
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to send telemetry event {event.name}: {e}")
|
||||
Reference in New Issue
Block a user