chore: 添加虚拟环境到仓库

- 添加 backend_service/venv 虚拟环境 - 包含所有Python依赖包 - 注意：虚拟环境约393MB，包含12655个文件
2025-12-03 10:19:25 +08:00
parent a6c2027caa
commit c4f851d387
12655 changed files with 3009376 additions and 0 deletions
--- a/backend_service/venv/lib/python3.13/site-packages/agentscope/token/init.py
+++ b/backend_service/venv/lib/python3.13/site-packages/agentscope/token/init.py
@@ -0,0 +1,16 @@
+# -*- coding: utf-8 -*-
+"""The token module in agentscope"""
+
+from ._token_base import TokenCounterBase
+from ._gemini_token_counter import GeminiTokenCounter
+from ._openai_token_counter import OpenAITokenCounter
+from ._anthropic_token_counter import AnthropicTokenCounter
+from ._huggingface_token_counter import HuggingFaceTokenCounter
+
+__all__ = [
+    "TokenCounterBase",
+    "GeminiTokenCounter",
+    "OpenAITokenCounter",
+    "AnthropicTokenCounter",
+    "HuggingFaceTokenCounter",
+]
--- a/backend_service/venv/lib/python3.13/site-packages/agentscope/token/pycache/init.cpython-313.pyc
+++ b/backend_service/venv/lib/python3.13/site-packages/agentscope/token/pycache/init.cpython-313.pyc
--- a/backend_service/venv/lib/python3.13/site-packages/agentscope/token/pycache/_anthropic_token_counter.cpython-313.pyc
+++ b/backend_service/venv/lib/python3.13/site-packages/agentscope/token/pycache/_anthropic_token_counter.cpython-313.pyc
--- a/backend_service/venv/lib/python3.13/site-packages/agentscope/token/pycache/_gemini_token_counter.cpython-313.pyc
+++ b/backend_service/venv/lib/python3.13/site-packages/agentscope/token/pycache/_gemini_token_counter.cpython-313.pyc
--- a/backend_service/venv/lib/python3.13/site-packages/agentscope/token/pycache/_huggingface_token_counter.cpython-313.pyc
+++ b/backend_service/venv/lib/python3.13/site-packages/agentscope/token/pycache/_huggingface_token_counter.cpython-313.pyc
--- a/backend_service/venv/lib/python3.13/site-packages/agentscope/token/pycache/_openai_token_counter.cpython-313.pyc
+++ b/backend_service/venv/lib/python3.13/site-packages/agentscope/token/pycache/_openai_token_counter.cpython-313.pyc
--- a/backend_service/venv/lib/python3.13/site-packages/agentscope/token/pycache/_token_base.cpython-313.pyc
+++ b/backend_service/venv/lib/python3.13/site-packages/agentscope/token/pycache/_token_base.cpython-313.pyc
--- a/backend_service/venv/lib/python3.13/site-packages/agentscope/token/_anthropic_token_counter.py
+++ b/backend_service/venv/lib/python3.13/site-packages/agentscope/token/_anthropic_token_counter.py
@@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+"""The Anthropic token counter class."""
+from typing import Any
+
+
+class AnthropicTokenCounter:
+    """The Anthropic token counter class."""
+
+    def __init__(self, model_name: str, api_key: str, **kwargs: Any) -> None:
+        """Initialize the Anthropic token counter.
+
+        Args:
+            model_name (`str`):
+                The name of the Anthropic model to use, e.g. "claude-2".
+            api_key (`str`):
+                The API key for Anthropic.
+        """
+        import anthropic
+
+        self.client = anthropic.AsyncAnthropic(api_key=api_key, **kwargs)
+        self.model_name = model_name
+
+    async def count(
+        self,
+        messages: list[dict],
+        tools: list[dict] | None = None,
+        **kwargs: Any,
+    ) -> int:
+        """Count the number of tokens for the given messages
+
+        .. note:: The Anthropic token counting API requires the multimodal
+         data to be in base64 format,
+
+        Args:
+            messages (`list[dict]`):
+                A list of dictionaries, where `role` and `content` fields are
+                required.
+            tools (`list[dict] | None`, defaults to `None`):
+                The tools JSON schemas that the model can use.
+            **kwargs (`Any`):
+                Additional keyword arguments for the token counting API.
+        """
+        system_message = None
+        if messages[0].get("role") == "system":
+            system_message = messages.pop(0)
+
+        extra_kwargs: dict = {
+            "model": self.model_name,
+            "messages": messages,
+            **kwargs,
+        }
+
+        if tools:
+            extra_kwargs["tools"] = tools
+
+        if system_message:
+            extra_kwargs["system"] = system_message
+
+        res = await self.client.messages.count_tokens(**extra_kwargs)
+
+        return res.input_tokens
--- a/backend_service/venv/lib/python3.13/site-packages/agentscope/token/_gemini_token_counter.py
+++ b/backend_service/venv/lib/python3.13/site-packages/agentscope/token/_gemini_token_counter.py
@@ -0,0 +1,50 @@
+# -*- coding: utf-8 -*-
+"""The gemini token counter class in agentscope."""
+from typing import Any
+
+from agentscope.token._token_base import TokenCounterBase
+
+
+class GeminiTokenCounter(TokenCounterBase):
+    """The Gemini token counter class."""
+
+    def __init__(self, model_name: str, api_key: str, **kwargs: Any) -> None:
+        """Initialize the Gemini token counter.
+
+        Args:
+            model_name (`str`):
+                The name of the Gemini model to use, e.g. "gemini-2.5-flash".
+            api_key (`str`):
+                The API key for Google Gemini.
+            **kwargs:
+                Additional keyword arguments that will be passed to the
+                Gemini client.
+        """
+        from google import genai
+
+        self.client = genai.Client(
+            api_key=api_key,
+            **kwargs,
+        )
+        self.model_name = model_name
+
+    async def count(
+        self,
+        messages: list[dict],
+        tools: list[dict] | None = None,
+        **config_kwargs: Any,
+    ) -> int:
+        """Count the number of tokens of gemini models."""
+
+        kwargs = {
+            "model": self.model_name,
+            "contents": messages,
+            "config": {
+                "tools": tools,
+                **config_kwargs,
+            },
+        }
+
+        res = self.client.models.count_tokens(**kwargs)
+
+        return res.total_tokens
--- a/backend_service/venv/lib/python3.13/site-packages/agentscope/token/_huggingface_token_counter.py
+++ b/backend_service/venv/lib/python3.13/site-packages/agentscope/token/_huggingface_token_counter.py
@@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+"""The huggingface token counter class."""
+import os
+from typing import Any
+
+from agentscope.token._token_base import TokenCounterBase
+
+
+class HuggingFaceTokenCounter(TokenCounterBase):
+    """The token counter for Huggingface models."""
+
+    def __init__(
+        self,
+        pretrained_model_name_or_path: str,
+        use_mirror: bool = False,
+        use_fast: bool = False,
+        trust_remote_code: bool = False,
+        **kwargs: Any,
+    ) -> None:
+        """Initialize the huggingface token counter.
+
+        Args:
+            pretrained_model_name_or_path (`str`):
+                The name or path of the pretrained model, which will be used
+                to download the tokenizer from Huggingface Hub.
+            use_mirror (`bool`, defaults to `False`):
+                Whether to enable the HuggingFace mirror, which is useful for
+                users in China.
+            use_fast (`bool`, defaults to `False`):
+                The argument that will be passed to the tokenizer.
+            trust_remote_code (`bool`, defaults to `False`):
+                The argument that will be passed to the tokenizer.
+            **kwargs:
+                Additional keyword arguments that will be passed to the
+                tokenizer.
+        """
+        if use_mirror:
+            os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
+
+        from transformers import AutoTokenizer
+
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            pretrained_model_name_or_path,
+            use_fast=use_fast,
+            trust_remote_code=trust_remote_code,
+            **kwargs,
+        )
+
+        if self.tokenizer.chat_template is None:
+            raise ValueError(
+                f"The tokenizer for model {pretrained_model_name_or_path} in "
+                f"transformers does not have chat template.",
+            )
+
+    async def count(
+        self,
+        messages: list[dict],
+        tools: list[dict] | None = None,
+        **kwargs: Any,
+    ) -> int:
+        """Count the number of tokens with the tokenizer download from
+        HuggingFace hub.
+
+        Args:
+            messages (`list[dict]`):
+                A list of message dictionaries
+            tools (`list[dict] | None`, defaults to `None`):
+                The JSON schema of the tools, which will also be involved in
+                the token counting.
+            **kwargs (`Any`):
+                The additional keyword arguments that will be passed to the
+                tokenizer, e.g. `chat_template`, `padding`, etc.
+        """
+        tokenized_msgs = self.tokenizer.apply_chat_template(
+            messages,
+            add_generation_prompt=False,
+            tokenize=True,
+            return_tensors="np",
+            tools=tools,
+            **kwargs,
+        )[0]
+
+        return len(tokenized_msgs)
--- a/backend_service/venv/lib/python3.13/site-packages/agentscope/token/_openai_token_counter.py
+++ b/backend_service/venv/lib/python3.13/site-packages/agentscope/token/_openai_token_counter.py
@@ -0,0 +1,384 @@
+# -*- coding: utf-8 -*-
+"""The OpenAI token counting class. The token calculation of vision models
+follows
+https://platform.openai.com/docs/guides/images-vision?api-mode=chat#calculating-costs
+"""
+import base64
+import io
+import json
+import math
+from http import HTTPStatus
+from typing import Any
+
+import requests
+
+from ._token_base import TokenCounterBase
+
+
+def _calculate_tokens_for_high_quality_image(
+    base_tokens: int,
+    tile_tokens: int,
+    width: int,
+    height: int,
+) -> int:
+    """Calculate the number of tokens for a high-quality image, which follows
+    https://platform.openai.com/docs/guides/images-vision?api-mode=chat#calculating-costs
+    """
+    # Step1: scale to fit within a 2048x2048 box
+    if width > 2048 or height > 2048:
+        ratio = min(2048 / width, 2048 / height)
+        width = int(width * ratio)
+        height = int(height * ratio)
+
+    # Step2: Scale to make the shortest side 768 pixels
+    shortest_side = min(width, height)
+    if shortest_side != 768:
+        ratio = 768 / shortest_side
+        width = int(width * ratio)
+        height = int(height * ratio)
+
+    # Step3: Calculate how many 512px tiles are needed
+    tiles_width = (width + 511) // 512
+    tiles_height = (height + 511) // 512
+    total_tiles = tiles_width * tiles_height
+
+    # Step4: Calculate the total tokens
+    total_tokens = (total_tiles * tile_tokens) + base_tokens
+
+    return total_tokens
+
+
+def _get_size_of_image_url(url: str) -> tuple[int, int]:
+    """Get the size of an image from the given URL.
+
+    Args:
+        url (`str`):
+            A web URL or base64 encoded image URL.
+
+    Returns:
+        `tuple[int, int]`:
+            A tuple containing the width and height of the image.
+    """
+    if url.startswith("data:image/"):
+        base64_data = url.split("base64,")[1]
+        image_data = base64.b64decode(base64_data)
+
+    else:
+        response = None
+        for _ in range(3):
+            response = requests.get(url)
+            if response.status_code == HTTPStatus.OK:
+                break
+        response.raise_for_status()
+        image_data = response.content
+
+    from PIL import Image
+
+    image = Image.open(io.BytesIO(image_data))
+    width, height = image.size
+    return width, height
+
+
+def _get_base_and_tile_tokens(model_name: str) -> tuple[int, int]:
+    """Get the base and tile tokens for the given OpenAI model.
+
+    Args:
+        model_name (`str`):
+            The name of the model.
+
+    Returns:
+        `tuple[int, int]`:
+            A tuple containing the base tokens and tile tokens.
+    """
+    if any(
+        model_name.startswith(_)
+        for _ in [
+            "gpt-4o",
+            "gpt-4.1",
+            "gpt-4.5",
+        ]
+    ):
+        return 85, 170
+
+    if any(
+        model_name.startswith(_)
+        for _ in [
+            "o1",
+            "o1-pro",
+            "o3",
+        ]
+    ):
+        return 75, 150
+
+    if model_name.startswith("4o-mini"):
+        return 2833, 5667
+
+    raise ValueError(
+        f"Unsupported OpenAI model {model_name} for token counting. ",
+    )
+
+
+def _calculate_tokens_for_tools(
+    model_name: str,
+    tools: list[dict],
+    encoding: Any,
+) -> int:
+    """Calculate the tokens for the given tools JSON schema, which follows the
+    OpenAI cookbook
+    https://github.com/openai/openai-cookbook/blob/6dfb7920b59a45291f7df4ea41338d1faf9ef1e8/examples/How_to_count_tokens_with_tiktoken.ipynb
+    """
+    if not tools:
+        return 0
+
+    func_init = 10
+    prop_init = 3
+    prop_key = 3
+    enum_init = -3
+    enum_item = 3
+    func_end = 12
+
+    if model_name.startswith("gpt-4o"):
+        func_init = 7
+
+    func_token_count = 0
+    for f in tools:
+        func_token_count += func_init
+        function = f["function"]
+        f_name = function["name"]
+        f_desc = function.get("description", "").removesuffix(".")
+        func_token_count += len(encoding.encode(f"{f_name}:{f_desc}"))
+
+        properties = function["parameters"]["properties"]
+
+        if len(properties) > 0:
+            func_token_count += prop_init
+            for key in properties.keys():
+                func_token_count += prop_key
+                p_name = key
+                p_type = properties[key]["type"]
+                p_desc = (
+                    properties[key].get("description", "").removesuffix(".")
+                )
+
+                if "enum" in properties[key].keys():
+                    func_token_count += enum_init
+                    for item in properties[key]["enum"]:
+                        func_token_count += enum_item
+                        func_token_count += len(encoding.encode(item))
+
+                func_token_count += len(
+                    encoding.encode(f"{p_name}:{p_type}:{p_desc}"),
+                )
+    func_token_count += func_end
+
+    return func_token_count
+
+
+def _count_content_tokens_for_openai_vision_model(
+    model_name: str,
+    content: list[dict],
+    encoding: Any,
+) -> int:
+    """Yield the number of tokens for the content of an OpenAI vision model.
+    Implemented according to https://platform.openai.com/docs/guides/vision.
+
+    Args:
+        model_name (`str`):
+            The name of the model.
+        content (`list[dict]`):
+            A list of dictionaries.
+        encoding (`Any`):
+            The encoding object.
+
+    Example:
+        .. code-block:: python
+
+            _yield_tokens_for_openai_vision_model(
+                [
+                    {
+                        "type": "text",
+                        "text": "xxx",
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": "xxx",
+                            "detail": "auto",
+                        }
+                    },
+                    # ...
+                ]
+            )
+
+    Returns:
+        `Generator[int, None, None]`: Generate the number of tokens in a
+        generator.
+    """
+
+    num_tokens = 0
+    for item in content:
+        assert isinstance(item, dict), (
+            "The content field should be a list of dictionaries, but got "
+            f"{type(item)}."
+        )
+
+        typ = item.get("type", None)
+        if typ == "text":
+            num_tokens += len(
+                encoding.encode(item["text"]),
+            )
+
+        elif typ == "image_url":
+            width, height = _get_size_of_image_url(item["image_url"]["url"])
+
+            # Different counting logic for different models
+            if any(
+                model_name.startswith(_)
+                for _ in [
+                    "gpt-4.1-mini",
+                    "gpt-4.1-nano",
+                    "o4-mini",
+                ]
+            ):
+                patches = min(
+                    math.ceil(width / 32) * math.ceil(height / 32),
+                    1536,
+                )
+                if model_name.startswith("gpt-4.1-mini"):
+                    num_tokens += math.ceil(patches * 1.62)
+
+                elif model_name.startswith("gpt-4.1-nano"):
+                    num_tokens += math.ceil(patches * 2.46)
+
+                else:
+                    num_tokens += math.ceil(patches * 1.72)
+
+            elif any(
+                model_name.startswith(_)
+                for _ in [
+                    "gpt-4o",
+                    "gpt-4.1",
+                    "gpt-4o-mini",
+                    "o",
+                ]
+            ):
+                base_tokens, tile_tokens = _get_base_and_tile_tokens(
+                    model_name,
+                )
+
+                # By default, we use high here to avoid undercounting tokens
+                detail = item.get("image_url").get("detail", "high")
+                if detail == "low":
+                    num_tokens += base_tokens
+
+                elif detail in ["auto", "high"]:
+                    num_tokens += _calculate_tokens_for_high_quality_image(
+                        base_tokens,
+                        tile_tokens,
+                        width,
+                        height,
+                    )
+
+                else:
+                    raise ValueError(
+                        f"Unsupported image detail {detail}, expected "
+                        f"one of ['low', 'auto', 'high'].",
+                    )
+
+        else:
+            raise ValueError(
+                "The type field currently only supports 'text' "
+                f"and 'image_url', but got {typ}.",
+            )
+
+    return num_tokens
+
+
+class OpenAITokenCounter(TokenCounterBase):
+    """The OpenAI token counting class."""
+
+    def __init__(self, model_name: str) -> None:
+        """Initialize the OpenAI token counter.
+
+        Args:
+            model_name (`str`):
+                The name of the OpenAI model to use for token counting.
+        """
+        self.model_name = model_name
+
+    async def count(
+        self,
+        messages: list[dict[str, Any]],
+        tools: list[dict] = None,
+        **kwargs: Any,
+    ) -> int:
+        """Count the token numbers of the given messages.
+
+        .. note:: OpenAI hasn't provided an official guide for counting tokens
+         with tools. If you have any ideas, please open an issue on
+         our GitHub repository.
+
+        Args:
+            messages (`list[dict[str, Any]]`):
+                A list of dictionaries, where `role` and `content` fields are
+                required.
+            tools (`list[dict]`, defaults to `None`):
+        """
+        import tiktoken
+
+        try:
+            encoding = tiktoken.encoding_for_model(self.model_name)
+        except KeyError:
+            encoding = tiktoken.get_encoding("o200k_base")
+
+        tokens_per_message = 3
+        tokens_per_name = 1
+
+        # every reply is primed with <|start|>assistant<|message|>
+        num_tokens = 3
+        for message in messages:
+            num_tokens += tokens_per_message
+            for key, value in message.items():
+                # Considering vision models
+                if key == "content" and isinstance(value, list):
+                    num_tokens += (
+                        _count_content_tokens_for_openai_vision_model(
+                            self.model_name,
+                            value,
+                            encoding,
+                        )
+                    )
+
+                elif isinstance(value, str):
+                    num_tokens += len(encoding.encode(value))
+
+                elif value is None:
+                    continue
+
+                elif key == "tool_calls":
+                    # TODO: This is only a temporary solution, since OpenAI
+                    # hasn't provided an official guide for counting tokens
+                    # with tool results.
+                    num_tokens += len(
+                        encoding.encode(
+                            json.dumps(value, ensure_ascii=False),
+                        ),
+                    )
+
+                else:
+                    raise TypeError(
+                        f"Invalid type {type(value)} in the {key} field: "
+                        f"{value}",
+                    )
+
+                if key == "name":
+                    num_tokens += tokens_per_name
+
+        if tools:
+            num_tokens += _calculate_tokens_for_tools(
+                self.model_name,
+                tools,
+                encoding,
+            )
+
+        return num_tokens
--- a/backend_service/venv/lib/python3.13/site-packages/agentscope/token/_token_base.py
+++ b/backend_service/venv/lib/python3.13/site-packages/agentscope/token/_token_base.py
@@ -0,0 +1,16 @@
+# -*- coding: utf-8 -*-
+"""The token base class in agentscope."""
+from abc import abstractmethod
+from typing import Any
+
+
+class TokenCounterBase:
+    """The base class for token counting."""
+
+    @abstractmethod
+    async def count(
+        self,
+        messages: list[dict],
+        **kwargs: Any,
+    ) -> int:
+        """Count the number of tokens by the given model and messages."""