chore: 添加虚拟环境到仓库

- 添加 backend_service/venv 虚拟环境 - 包含所有Python依赖包 - 注意：虚拟环境约393MB，包含12655个文件
2025-12-03 10:19:25 +08:00
parent a6c2027caa
commit c4f851d387
12655 changed files with 3009376 additions and 0 deletions
--- a/backend_service/venv/lib/python3.13/site-packages/dashscope/aigc/generation.py
+++ b/backend_service/venv/lib/python3.13/site-packages/dashscope/aigc/generation.py
@@ -0,0 +1,407 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import copy
+import json
+from typing import Any, Dict, Generator, List, Union, AsyncGenerator
+
+from dashscope.api_entities.dashscope_response import (GenerationResponse,
+                                                       Message, Role)
+from dashscope.client.base_api import BaseAioApi, BaseApi
+from dashscope.common.constants import (CUSTOMIZED_MODEL_ID,
+                                        DEPRECATED_MESSAGE, HISTORY, MESSAGES,
+                                        PROMPT)
+from dashscope.common.error import InputRequired, ModelRequired
+from dashscope.common.logging import logger
+from dashscope.common.utils import _get_task_group_and_task
+from dashscope.utils.param_utils import ParamUtil
+from dashscope.utils.message_utils import merge_single_response
+
+
+class Generation(BaseApi):
+    task = 'text-generation'
+    """API for AI-Generated Content(AIGC) models.
+
+    """
+    class Models:
+        """@deprecated, use qwen_turbo instead"""
+        qwen_v1 = 'qwen-v1'
+        """@deprecated, use qwen_plus instead"""
+        qwen_plus_v1 = 'qwen-plus-v1'
+
+        bailian_v1 = 'bailian-v1'
+        dolly_12b_v2 = 'dolly-12b-v2'
+        qwen_turbo = 'qwen-turbo'
+        qwen_plus = 'qwen-plus'
+        qwen_max = 'qwen-max'
+
+    @classmethod
+    def call(
+        cls,
+        model: str,
+        prompt: Any = None,
+        history: list = None,
+        api_key: str = None,
+        messages: List[Message] = None,
+        plugins: Union[str, Dict[str, Any]] = None,
+        workspace: str = None,
+        **kwargs
+    ) -> Union[GenerationResponse, Generator[GenerationResponse, None, None]]:
+        """Call generation model service.
+
+        Args:
+            model (str): The requested model, such as qwen-turbo
+            prompt (Any): The input prompt.
+            history (list):The user provided history, deprecated
+                examples:
+                    [{'user':'The weather is fine today.',
+                    'bot': 'Suitable for outings'}].
+                Defaults to None.
+            api_key (str, optional): The api api_key, can be None,
+                if None, will get by default rule(TODO: api key doc).
+            messages (list): The generation messages.
+                examples:
+                    [{'role': 'user',
+                      'content': 'The weather is fine today.'},
+                      {'role': 'assistant', 'content': 'Suitable for outings'}]
+            plugins (Any): The plugin config. Can be plugins config str, or dict.
+            **kwargs:
+                stream(bool, `optional`): Enable server-sent events
+                    (ref: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)  # noqa E501
+                    the result will back partially[qwen-turbo,bailian-v1].
+                temperature(float, `optional`): Used to control the degree
+                    of randomness and diversity. Specifically, the temperature
+                    value controls the degree to which the probability distribution
+                    of each candidate word is smoothed when generating text.
+                    A higher temperature value will reduce the peak value of
+                    the probability, allowing more low-probability words to be
+                    selected, and the generated results will be more diverse;
+                    while a lower temperature value will enhance the peak value
+                    of the probability, making it easier for high-probability
+                    words to be selected, the generated results are more
+                    deterministic, range(0, 2) .[qwen-turbo,qwen-plus].
+                top_p(float, `optional`): A sampling strategy, called nucleus
+                    sampling, where the model considers the results of the
+                    tokens with top_p probability mass. So 0.1 means only
+                    the tokens comprising the top 10% probability mass are
+                    considered[qwen-turbo,bailian-v1].
+                top_k(int, `optional`): The size of the sample candidate set when generated.  # noqa E501
+                    For example, when the value is 50, only the 50 highest-scoring tokens  # noqa E501
+                    in a single generation form a randomly sampled candidate set. # noqa E501
+                    The larger the value, the higher the randomness generated;  # noqa E501
+                    the smaller the value, the higher the certainty generated. # noqa E501
+                    The default value is 0, which means the top_k policy is  # noqa E501
+                    not enabled. At this time, only the top_p policy takes effect. # noqa E501
+                enable_search(bool, `optional`): Whether to enable web search(quark).  # noqa E501
+                    Currently works best only on the first round of conversation.
+                    Default to False, support model: [qwen-turbo].
+                customized_model_id(str, required) The enterprise-specific
+                    large model id, which needs to be generated from the
+                    operation background of the enterprise-specific
+                    large model product, support model: [bailian-v1].
+                result_format(str, `optional`): [message|text] Set result result format. # noqa E501
+                    Default result is text
+                incremental_output(bool, `optional`): Used to control the streaming output mode. # noqa E501
+                    If true, the subsequent output will include the previously input content. # noqa E501
+                    Otherwise, the subsequent output will not include the previously output # noqa E501
+                    content. Default false.
+                stop(list[str] or list[list[int]], `optional`): Used to control the generation to stop  # noqa E501
+                    when encountering setting str or token ids, the result will not include # noqa E501
+                    stop words or tokens.
+                max_tokens(int, `optional`): The maximum token num expected to be output. It should be # noqa E501
+                    noted that the length generated by the model will only be less than max_tokens,  # noqa E501
+                    not necessarily equal to it. If max_tokens is set too large, the service will # noqa E501
+                    directly prompt that the length exceeds the limit. It is generally # noqa E501
+                    not recommended to set this value.
+                repetition_penalty(float, `optional`): Used to control the repeatability when generating models.  # noqa E501
+                    Increasing repetition_penalty can reduce the duplication of model generation.  # noqa E501
+                    1.0 means no punishment.
+            workspace (str): The dashscope workspace id.
+        Raises:
+            InvalidInput: The history and auto_history are mutually exclusive.
+
+        Returns:
+            Union[GenerationResponse,
+                  Generator[GenerationResponse, None, None]]: If
+            stream is True, return Generator, otherwise GenerationResponse.
+        """
+        if (prompt is None or not prompt) and (messages is None
+                                               or not messages):
+            raise InputRequired('prompt or messages is required!')
+        if model is None or not model:
+            raise ModelRequired('Model is required!')
+        task_group, function = _get_task_group_and_task(__name__)
+        if plugins is not None:
+            headers = kwargs.pop('headers', {})
+            if isinstance(plugins, str):
+                headers['X-DashScope-Plugin'] = plugins
+            else:
+                headers['X-DashScope-Plugin'] = json.dumps(plugins)
+            kwargs['headers'] = headers
+        input, parameters = cls._build_input_parameters(
+            model, prompt, history, messages, **kwargs)
+
+        is_stream = parameters.get('stream', False)
+        # Check if we need to merge incremental output
+        is_incremental_output = kwargs.get('incremental_output', None)
+        to_merge_incremental_output = False
+        if (ParamUtil.should_modify_incremental_output(model) and
+                is_stream and is_incremental_output is False):
+            to_merge_incremental_output = True
+            parameters['incremental_output'] = True
+
+        # Pass incremental_to_full flag via headers user-agent
+        if 'headers' not in parameters:
+            parameters['headers'] = {}
+        flag = '1' if to_merge_incremental_output else '0'
+        parameters['headers']['user-agent'] = f'incremental_to_full/{flag}'
+
+        response = super().call(model=model,
+                                task_group=task_group,
+                                task=Generation.task,
+                                function=function,
+                                api_key=api_key,
+                                input=input,
+                                workspace=workspace,
+                                **parameters)
+        if is_stream:
+            if to_merge_incremental_output:
+                # Extract n parameter for merge logic
+                n = parameters.get('n', 1)
+                return cls._merge_generation_response(response, n)
+            else:
+                return (GenerationResponse.from_api_response(rsp)
+                        for rsp in response)
+        else:
+            return GenerationResponse.from_api_response(response)
+
+    @classmethod
+    def _build_input_parameters(cls, model, prompt, history, messages,
+                                **kwargs):
+        if model == Generation.Models.qwen_v1:
+            logger.warning(
+                'Model %s is deprecated, use %s instead!' %
+                (Generation.Models.qwen_v1, Generation.Models.qwen_turbo))
+        if model == Generation.Models.qwen_plus_v1:
+            logger.warning(
+                'Model %s is deprecated, use %s instead!' %
+                (Generation.Models.qwen_plus_v1, Generation.Models.qwen_plus))
+        parameters = {}
+        input = {}
+        if history is not None:
+            logger.warning(DEPRECATED_MESSAGE)
+            input[HISTORY] = history
+            if prompt is not None and prompt:
+                input[PROMPT] = prompt
+        elif messages is not None:
+            msgs = copy.deepcopy(messages)
+            if prompt is not None and prompt:
+                msgs.append({'role': Role.USER, 'content': prompt})
+            input = {MESSAGES: msgs}
+        else:
+            input[PROMPT] = prompt
+
+        if model.startswith('qwen'):
+            enable_search = kwargs.pop('enable_search', False)
+            if enable_search:
+                parameters['enable_search'] = enable_search
+        elif model.startswith('bailian'):
+            customized_model_id = kwargs.pop('customized_model_id', None)
+            if customized_model_id is None:
+                raise InputRequired('customized_model_id is required for %s' %
+                                    model)
+            input[CUSTOMIZED_MODEL_ID] = customized_model_id
+
+        return input, {**parameters, **kwargs}
+
+    @classmethod
+    def _merge_generation_response(cls, response, n=1) -> Generator[GenerationResponse, None, None]:
+        """Merge incremental response chunks to simulate non-incremental output."""
+        accumulated_data = {}
+        for rsp in response:
+            parsed_response = GenerationResponse.from_api_response(rsp)
+            result = merge_single_response(parsed_response, accumulated_data, n)
+            if result is True:
+                yield parsed_response
+            elif isinstance(result, list):
+                # Multiple responses to yield (for n>1 non-stop cases)
+                for resp in result:
+                    yield resp
+
+
+class AioGeneration(BaseAioApi):
+    task = 'text-generation'
+    """API for AI-Generated Content(AIGC) models.
+
+    """
+    class Models:
+        """@deprecated, use qwen_turbo instead"""
+        qwen_v1 = 'qwen-v1'
+        """@deprecated, use qwen_plus instead"""
+        qwen_plus_v1 = 'qwen-plus-v1'
+
+        bailian_v1 = 'bailian-v1'
+        dolly_12b_v2 = 'dolly-12b-v2'
+        qwen_turbo = 'qwen-turbo'
+        qwen_plus = 'qwen-plus'
+        qwen_max = 'qwen-max'
+
+    @classmethod
+    async def call(
+        cls,
+        model: str,
+        prompt: Any = None,
+        history: list = None,
+        api_key: str = None,
+        messages: List[Message] = None,
+        plugins: Union[str, Dict[str, Any]] = None,
+        workspace: str = None,
+        **kwargs
+    ) -> Union[GenerationResponse, AsyncGenerator[GenerationResponse, None]]:
+        """Call generation model service.
+
+        Args:
+            model (str): The requested model, such as qwen-turbo
+            prompt (Any): The input prompt.
+            history (list):The user provided history, deprecated
+                examples:
+                    [{'user':'The weather is fine today.',
+                    'bot': 'Suitable for outings'}].
+                Defaults to None.
+            api_key (str, optional): The api api_key, can be None,
+                if None, will get by default rule(TODO: api key doc).
+            messages (list): The generation messages.
+                examples:
+                    [{'role': 'user',
+                      'content': 'The weather is fine today.'},
+                      {'role': 'assistant', 'content': 'Suitable for outings'}]
+            plugins (Any): The plugin config. Can be plugins config str, or dict.
+            **kwargs:
+                stream(bool, `optional`): Enable server-sent events
+                    (ref: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events)  # noqa E501
+                    the result will back partially[qwen-turbo,bailian-v1].
+                temperature(float, `optional`): Used to control the degree
+                    of randomness and diversity. Specifically, the temperature
+                    value controls the degree to which the probability distribution
+                    of each candidate word is smoothed when generating text.
+                    A higher temperature value will reduce the peak value of
+                    the probability, allowing more low-probability words to be
+                    selected, and the generated results will be more diverse;
+                    while a lower temperature value will enhance the peak value
+                    of the probability, making it easier for high-probability
+                    words to be selected, the generated results are more
+                    deterministic, range(0, 2) .[qwen-turbo,qwen-plus].
+                top_p(float, `optional`): A sampling strategy, called nucleus
+                    sampling, where the model considers the results of the
+                    tokens with top_p probability mass. So 0.1 means only
+                    the tokens comprising the top 10% probability mass are
+                    considered[qwen-turbo,bailian-v1].
+                top_k(int, `optional`): The size of the sample candidate set when generated.  # noqa E501
+                    For example, when the value is 50, only the 50 highest-scoring tokens  # noqa E501
+                    in a single generation form a randomly sampled candidate set. # noqa E501
+                    The larger the value, the higher the randomness generated;  # noqa E501
+                    the smaller the value, the higher the certainty generated. # noqa E501
+                    The default value is 0, which means the top_k policy is  # noqa E501
+                    not enabled. At this time, only the top_p policy takes effect. # noqa E501
+                enable_search(bool, `optional`): Whether to enable web search(quark).  # noqa E501
+                    Currently works best only on the first round of conversation.
+                    Default to False, support model: [qwen-turbo].
+                customized_model_id(str, required) The enterprise-specific
+                    large model id, which needs to be generated from the
+                    operation background of the enterprise-specific
+                    large model product, support model: [bailian-v1].
+                result_format(str, `optional`): [message|text] Set result result format. # noqa E501
+                    Default result is text
+                incremental_output(bool, `optional`): Used to control the streaming output mode. # noqa E501
+                    If true, the subsequent output will include the previously input content. # noqa E501
+                    Otherwise, the subsequent output will not include the previously output # noqa E501
+                    content. Default false.
+                stop(list[str] or list[list[int]], `optional`): Used to control the generation to stop  # noqa E501
+                    when encountering setting str or token ids, the result will not include # noqa E501
+                    stop words or tokens.
+                max_tokens(int, `optional`): The maximum token num expected to be output. It should be # noqa E501
+                    noted that the length generated by the model will only be less than max_tokens,  # noqa E501
+                    not necessarily equal to it. If max_tokens is set too large, the service will # noqa E501
+                    directly prompt that the length exceeds the limit. It is generally # noqa E501
+                    not recommended to set this value.
+                repetition_penalty(float, `optional`): Used to control the repeatability when generating models.  # noqa E501
+                    Increasing repetition_penalty can reduce the duplication of model generation.  # noqa E501
+                    1.0 means no punishment.
+            workspace (str): The dashscope workspace id.
+        Raises:
+            InvalidInput: The history and auto_history are mutually exclusive.
+
+        Returns:
+            Union[GenerationResponse,
+                  AsyncGenerator[GenerationResponse, None]]: If
+            stream is True, return AsyncGenerator, otherwise GenerationResponse.
+        """
+        if (prompt is None or not prompt) and (messages is None
+                                               or not messages):
+            raise InputRequired('prompt or messages is required!')
+        if model is None or not model:
+            raise ModelRequired('Model is required!')
+        task_group, function = _get_task_group_and_task(__name__)
+        if plugins is not None:
+            headers = kwargs.pop('headers', {})
+            if isinstance(plugins, str):
+                headers['X-DashScope-Plugin'] = plugins
+            else:
+                headers['X-DashScope-Plugin'] = json.dumps(plugins)
+            kwargs['headers'] = headers
+        input, parameters = Generation._build_input_parameters(
+            model, prompt, history, messages, **kwargs)
+
+        is_stream = parameters.get('stream', False)
+        # Check if we need to merge incremental output
+        is_incremental_output = kwargs.get('incremental_output', None)
+        to_merge_incremental_output = False
+        if (ParamUtil.should_modify_incremental_output(model) and
+                is_stream and is_incremental_output is False):
+            to_merge_incremental_output = True
+            parameters['incremental_output'] = True
+
+        # Pass incremental_to_full flag via headers user-agent
+        if 'headers' not in parameters:
+            parameters['headers'] = {}
+        flag = '1' if to_merge_incremental_output else '0'
+        parameters['headers']['user-agent'] = f'incremental_to_full/{flag}'
+
+        response = await super().call(model=model,
+                                      task_group=task_group,
+                                      task=Generation.task,
+                                      function=function,
+                                      api_key=api_key,
+                                      input=input,
+                                      workspace=workspace,
+                                      **parameters)
+        if is_stream:
+            if to_merge_incremental_output:
+                # Extract n parameter for merge logic
+                n = parameters.get('n', 1)
+                return cls._merge_generation_response(response, n)
+            else:
+                return cls._stream_responses(response)
+        else:
+            return GenerationResponse.from_api_response(response)
+
+    @classmethod
+    async def _stream_responses(cls, response) -> AsyncGenerator[GenerationResponse, None]:
+        """Convert async response stream to GenerationResponse stream."""
+        # Type hint: when stream=True, response is actually an AsyncIterable
+        async for rsp in response:  # type: ignore
+            yield GenerationResponse.from_api_response(rsp)
+
+    @classmethod
+    async def _merge_generation_response(cls, response, n=1) -> AsyncGenerator[GenerationResponse, None]:
+        """Async version of merge incremental response chunks."""
+        accumulated_data = {}
+
+        async for rsp in response:  # type: ignore
+            parsed_response = GenerationResponse.from_api_response(rsp)
+            result = merge_single_response(parsed_response, accumulated_data, n)
+            if result is True:
+                yield parsed_response
+            elif isinstance(result, list):
+                # Multiple responses to yield (for n>1 non-stop cases)
+                for resp in result:
+                    yield resp