chore: 添加虚拟环境到仓库

- 添加 backend_service/venv 虚拟环境
- 包含所有Python依赖包
- 注意:虚拟环境约393MB,包含12655个文件
This commit is contained in:
2025-12-03 10:19:25 +08:00
parent a6c2027caa
commit c4f851d387
12655 changed files with 3009376 additions and 0 deletions

View File

@@ -0,0 +1,49 @@
import re
from typing import Tuple
from uuid import UUID
from chromadb.db.base import SqlDB
from chromadb.segment import SegmentManager, VectorReader
topic_regex = r"persistent:\/\/(?P<tenant>.+)\/(?P<namespace>.+)\/(?P<topic>.+)"
def parse_topic_name(topic_name: str) -> Tuple[str, str, str]:
"""Parse the topic name into the tenant, namespace and topic name"""
match = re.match(topic_regex, topic_name)
if not match:
raise ValueError(f"Invalid topic name: {topic_name}")
return match.group("tenant"), match.group("namespace"), match.group("topic")
def create_topic_name(tenant: str, namespace: str, collection_id: UUID) -> str:
return f"persistent://{tenant}/{namespace}/{str(collection_id)}"
def trigger_vector_segments_max_seq_id_migration(
db: SqlDB, segment_manager: SegmentManager
) -> None:
"""
Trigger the migration of vector segments' max_seq_id from the pickled metadata file to SQLite.
Vector segments migrate this field automatically on init—so this should be used when we know segments are likely unmigrated and unloaded.
This is a no-op if all vector segments have already migrated their max_seq_id.
"""
with db.tx() as cur:
cur.execute(
"""
SELECT collection
FROM "segments"
WHERE "id" NOT IN (SELECT "segment_id" FROM "max_seq_id") AND
"type" = 'urn:chroma:segment/vector/hnsw-local-persisted'
"""
)
collection_ids_with_unmigrated_segments = [row[0] for row in cur.fetchall()]
if len(collection_ids_with_unmigrated_segments) == 0:
return
for collection_id in collection_ids_with_unmigrated_segments:
# Loading the segment triggers the migration on init
segment_manager.get_segment(UUID(collection_id), VectorReader)