chore: 添加虚拟环境到仓库

- 添加 backend_service/venv 虚拟环境 - 包含所有Python依赖包 - 注意：虚拟环境约393MB，包含12655个文件
2025-12-03 10:19:25 +08:00
parent a6c2027caa
commit c4f851d387
12655 changed files with 3009376 additions and 0 deletions
--- a/backend_service/venv/lib/python3.13/site-packages/chromadb/ingest/impl/utils.py
+++ b/backend_service/venv/lib/python3.13/site-packages/chromadb/ingest/impl/utils.py
@@ -0,0 +1,49 @@
+import re
+from typing import Tuple
+from uuid import UUID
+
+from chromadb.db.base import SqlDB
+from chromadb.segment import SegmentManager, VectorReader
+
+topic_regex = r"persistent:\/\/(?P<tenant>.+)\/(?P<namespace>.+)\/(?P<topic>.+)"
+
+
+def parse_topic_name(topic_name: str) -> Tuple[str, str, str]:
+    """Parse the topic name into the tenant, namespace and topic name"""
+    match = re.match(topic_regex, topic_name)
+    if not match:
+        raise ValueError(f"Invalid topic name: {topic_name}")
+    return match.group("tenant"), match.group("namespace"), match.group("topic")
+
+
+def create_topic_name(tenant: str, namespace: str, collection_id: UUID) -> str:
+    return f"persistent://{tenant}/{namespace}/{str(collection_id)}"
+
+
+def trigger_vector_segments_max_seq_id_migration(
+    db: SqlDB, segment_manager: SegmentManager
+) -> None:
+    """
+    Trigger the migration of vector segments' max_seq_id from the pickled metadata file to SQLite.
+
+    Vector segments migrate this field automatically on init—so this should be used when we know segments are likely unmigrated and unloaded.
+
+    This is a no-op if all vector segments have already migrated their max_seq_id.
+    """
+    with db.tx() as cur:
+        cur.execute(
+            """
+            SELECT collection
+            FROM "segments"
+            WHERE "id" NOT IN (SELECT "segment_id" FROM "max_seq_id") AND
+                  "type" = 'urn:chroma:segment/vector/hnsw-local-persisted'
+        """
+        )
+        collection_ids_with_unmigrated_segments = [row[0] for row in cur.fetchall()]
+
+    if len(collection_ids_with_unmigrated_segments) == 0:
+        return
+
+    for collection_id in collection_ids_with_unmigrated_segments:
+        # Loading the segment triggers the migration on init
+        segment_manager.get_segment(UUID(collection_id), VectorReader)