chore: 添加虚拟环境到仓库

- 添加 backend_service/venv 虚拟环境 - 包含所有Python依赖包 - 注意：虚拟环境约393MB，包含12655个文件
2025-12-03 10:19:25 +08:00
parent a6c2027caa
commit c4f851d387
12655 changed files with 3009376 additions and 0 deletions
--- a/backend_service/venv/lib/python3.13/site-packages/dashscope/audio/asr/init.py
+++ b/backend_service/venv/lib/python3.13/site-packages/dashscope/audio/asr/init.py
@@ -0,0 +1,20 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from .asr_phrase_manager import AsrPhraseManager
+from .recognition import Recognition, RecognitionCallback, RecognitionResult
+from .transcription import Transcription
+from .translation_recognizer import (TranscriptionResult, Translation,
+                                     TranslationRecognizerCallback,
+                                     TranslationRecognizerChat,
+                                     TranslationRecognizerRealtime,
+                                     TranslationRecognizerResultPack,
+                                     TranslationResult)
+from .vocabulary import VocabularyService, VocabularyServiceException
+
+__all__ = [
+    'Transcription', 'Recognition', 'RecognitionCallback', 'RecognitionResult',
+    'AsrPhraseManager', 'VocabularyServiceException', 'VocabularyService',
+    'TranslationRecognizerRealtime', 'TranslationRecognizerChat',
+    'TranslationRecognizerCallback', 'Translation', 'TranslationResult',
+    'TranscriptionResult', 'TranslationRecognizerResultPack'
+]
--- a/backend_service/venv/lib/python3.13/site-packages/dashscope/audio/asr/pycache/init.cpython-313.pyc
+++ b/backend_service/venv/lib/python3.13/site-packages/dashscope/audio/asr/pycache/init.cpython-313.pyc
--- a/backend_service/venv/lib/python3.13/site-packages/dashscope/audio/asr/pycache/asr_phrase_manager.cpython-313.pyc
+++ b/backend_service/venv/lib/python3.13/site-packages/dashscope/audio/asr/pycache/asr_phrase_manager.cpython-313.pyc
--- a/backend_service/venv/lib/python3.13/site-packages/dashscope/audio/asr/pycache/recognition.cpython-313.pyc
+++ b/backend_service/venv/lib/python3.13/site-packages/dashscope/audio/asr/pycache/recognition.cpython-313.pyc
--- a/backend_service/venv/lib/python3.13/site-packages/dashscope/audio/asr/pycache/transcription.cpython-313.pyc
+++ b/backend_service/venv/lib/python3.13/site-packages/dashscope/audio/asr/pycache/transcription.cpython-313.pyc
--- a/backend_service/venv/lib/python3.13/site-packages/dashscope/audio/asr/pycache/translation_recognizer.cpython-313.pyc
+++ b/backend_service/venv/lib/python3.13/site-packages/dashscope/audio/asr/pycache/translation_recognizer.cpython-313.pyc
--- a/backend_service/venv/lib/python3.13/site-packages/dashscope/audio/asr/pycache/vocabulary.cpython-313.pyc
+++ b/backend_service/venv/lib/python3.13/site-packages/dashscope/audio/asr/pycache/vocabulary.cpython-313.pyc
--- a/backend_service/venv/lib/python3.13/site-packages/dashscope/audio/asr/asr_phrase_manager.py
+++ b/backend_service/venv/lib/python3.13/site-packages/dashscope/audio/asr/asr_phrase_manager.py
@@ -0,0 +1,203 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+from http import HTTPStatus
+from typing import Any, Dict
+
+from dashscope.api_entities.dashscope_response import DashScopeAPIResponse
+from dashscope.client.base_api import BaseAsyncApi
+from dashscope.common.error import InvalidParameter
+from dashscope.common.logging import logger
+from dashscope.customize.finetunes import FineTunes
+
+
+class AsrPhraseManager(BaseAsyncApi):
+    """Hot word management for speech recognition.
+    """
+    @classmethod
+    def create_phrases(cls,
+                       model: str,
+                       phrases: Dict[str, Any],
+                       training_type: str = 'compile_asr_phrase',
+                       workspace: str = None,
+                       **kwargs) -> DashScopeAPIResponse:
+        """Create hot words.
+
+        Args:
+            model (str): The requested model.
+            phrases (Dict[str, Any]): A dictionary that contains phrases,
+                such as {'下一首':90,'上一首':90}.
+            training_type (str, `optional`): The training type,
+                'compile_asr_phrase' is default.
+            workspace (str): The dashscope workspace id.
+
+        Raises:
+            InvalidParameter: Parameter input is None or empty!
+
+        Returns:
+            DashScopeAPIResponse: The results of creating hot words.
+        """
+        if phrases is None or len(phrases) == 0:
+            raise InvalidParameter('phrases is empty!')
+        if training_type is None or len(training_type) == 0:
+            raise InvalidParameter('training_type is empty!')
+
+        original_ft_sub_path = FineTunes.SUB_PATH
+        FineTunes.SUB_PATH = 'fine-tunes'
+        response = FineTunes.call(model=model,
+                                  training_file_ids=[],
+                                  validation_file_ids=[],
+                                  mode=training_type,
+                                  hyper_parameters={'phrase_list': phrases},
+                                  workspace=workspace,
+                                  **kwargs)
+        FineTunes.SUB_PATH = original_ft_sub_path
+
+        if response.status_code != HTTPStatus.OK:
+            logger.error('Create phrase failed, ' + str(response))
+
+        return response
+
+    @classmethod
+    def update_phrases(cls,
+                       model: str,
+                       phrase_id: str,
+                       phrases: Dict[str, Any],
+                       training_type: str = 'compile_asr_phrase',
+                       workspace: str = None,
+                       **kwargs) -> DashScopeAPIResponse:
+        """Update the hot words marked phrase_id.
+
+        Args:
+            model (str): The requested model.
+            phrase_id (str): The ID of phrases,
+                which created by create_phrases().
+            phrases (Dict[str, Any]): A dictionary that contains phrases,
+                such as {'暂停':90}.
+            training_type (str, `optional`):
+                The training type, 'compile_asr_phrase' is default.
+            workspace (str): The dashscope workspace id.
+
+        Raises:
+            InvalidParameter: Parameter input is None or empty!
+
+        Returns:
+            DashScopeAPIResponse: The results of updating hot words.
+        """
+        if phrase_id is None or len(phrase_id) == 0:
+            raise InvalidParameter('phrase_id is empty!')
+        if phrases is None or len(phrases) == 0:
+            raise InvalidParameter('phrases is empty!')
+        if training_type is None or len(training_type) == 0:
+            raise InvalidParameter('training_type is empty!')
+
+        original_ft_sub_path = FineTunes.SUB_PATH
+        FineTunes.SUB_PATH = 'fine-tunes'
+        response = FineTunes.call(model=model,
+                                  training_file_ids=[],
+                                  validation_file_ids=[],
+                                  mode=training_type,
+                                  hyper_parameters={'phrase_list': phrases},
+                                  finetuned_output=phrase_id,
+                                  workspace=workspace,
+                                  **kwargs)
+        FineTunes.SUB_PATH = original_ft_sub_path
+
+        if response.status_code != HTTPStatus.OK:
+            logger.error('Update phrase failed, ' + str(response))
+
+        return response
+
+    @classmethod
+    def query_phrases(cls,
+                      phrase_id: str,
+                      workspace: str = None,
+                      **kwargs) -> DashScopeAPIResponse:
+        """Query the hot words by phrase_id.
+
+        Args:
+            phrase_id (str): The ID of phrases,
+                which created by create_phrases().
+            workspace (str): The dashscope workspace id.
+
+        Raises:
+            InvalidParameter: phrase_id input is None or empty!
+
+        Returns:
+            AsrPhraseManagerResult: The results of querying hot words.
+        """
+        if phrase_id is None or len(phrase_id) == 0:
+            raise InvalidParameter('phrase_id is empty!')
+
+        original_ft_sub_path = FineTunes.SUB_PATH
+        FineTunes.SUB_PATH = 'fine-tunes/outputs'
+        response = FineTunes.get(job_id=phrase_id,
+                                 workspace=workspace,
+                                 **kwargs)
+        FineTunes.SUB_PATH = original_ft_sub_path
+
+        if response.status_code != HTTPStatus.OK:
+            logger.error('Query phrase failed, ' + str(response))
+
+        return response
+
+    @classmethod
+    def list_phrases(cls,
+                     page: int = 1,
+                     page_size: int = 10,
+                     workspace: str = None,
+                     **kwargs) -> DashScopeAPIResponse:
+        """List all information of phrases.
+
+        Args:
+            page (int): Page number, greater than 0, default value 1.
+            page_size (int): The paging size, greater than 0
+                and less than or equal to 100, default value 10.
+            workspace (str): The dashscope workspace id.
+
+        Returns:
+            DashScopeAPIResponse: The results of listing hot words.
+        """
+        original_ft_sub_path = FineTunes.SUB_PATH
+        FineTunes.SUB_PATH = 'fine-tunes/outputs'
+        response = FineTunes.list(page=page,
+                                  page_size=page_size,
+                                  workspace=workspace,
+                                  **kwargs)
+        FineTunes.SUB_PATH = original_ft_sub_path
+
+        if response.status_code != HTTPStatus.OK:
+            logger.error('List phrase failed, ' + str(response))
+
+        return response
+
+    @classmethod
+    def delete_phrases(cls,
+                       phrase_id: str,
+                       workspace: str = None,
+                       **kwargs) -> DashScopeAPIResponse:
+        """Delete the hot words by phrase_id.
+
+        Args:
+            phrase_id (str): The ID of phrases,
+                which created by create_phrases().
+
+        Raises:
+            InvalidParameter: phrase_id input is None or empty!
+
+        Returns:
+            DashScopeAPIResponse: The results of deleting hot words.
+        """
+        if phrase_id is None or len(phrase_id) == 0:
+            raise InvalidParameter('phrase_id is empty!')
+
+        original_ft_sub_path = FineTunes.SUB_PATH
+        FineTunes.SUB_PATH = 'fine-tunes/outputs'
+        response = FineTunes.delete(job_id=phrase_id,
+                                    workspace=workspace,
+                                    **kwargs)
+        FineTunes.SUB_PATH = original_ft_sub_path
+
+        if response.status_code != HTTPStatus.OK:
+            logger.error('Delete phrase failed, ' + str(response))
+
+        return response
--- a/backend_service/venv/lib/python3.13/site-packages/dashscope/audio/asr/recognition.py
+++ b/backend_service/venv/lib/python3.13/site-packages/dashscope/audio/asr/recognition.py
@@ -0,0 +1,527 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import json
+import os
+import threading
+import time
+import uuid
+from http import HTTPStatus
+from queue import Queue
+from threading import Timer
+from typing import Any, Dict, List, Union
+
+from dashscope.api_entities.dashscope_response import RecognitionResponse
+from dashscope.client.base_api import BaseApi
+from dashscope.common.constants import ApiProtocol
+from dashscope.common.error import (InputDataRequired, InputRequired,
+                                    InvalidParameter, InvalidTask,
+                                    ModelRequired)
+from dashscope.common.logging import logger
+from dashscope.common.utils import _get_task_group_and_task
+from dashscope.protocol.websocket import WebsocketStreamingMode
+
+
+class RecognitionResult(RecognitionResponse):
+    """The result set of speech recognition, including the single-sentence
+       recognition result returned by the callback mode, and all recognition
+       results in a synchronized manner.
+    """
+    def __init__(self,
+                 response: RecognitionResponse,
+                 sentences: List[Any] = None,
+                 usages: List[Any] = None):
+        self.status_code = response.status_code
+        self.request_id = response.request_id
+        self.code = response.code
+        self.message = response.message
+        self.usages = usages
+        if sentences is not None and len(sentences) > 0:
+            self.output = {'sentence': sentences}
+        else:
+            self.output = response.output
+        if self.usages is not None and len(
+                self.usages) > 0 and 'usage' in self.usages[-1]:
+            self.usage = self.usages[-1]['usage']
+        else:
+            self.usage = None
+
+    def __str__(self):
+        return json.dumps(RecognitionResponse.from_api_response(self),
+                          ensure_ascii=False)
+
+    def get_sentence(self) -> Union[Dict[str, Any], List[Any]]:
+        """The result of speech recognition.
+        """
+        if self.output and 'sentence' in self.output:
+            return self.output['sentence']
+        else:
+            return None
+
+    def get_request_id(self) -> str:
+        """The request_id of speech recognition.
+        """
+        return self.request_id
+
+    def get_usage(self, sentence: Dict[str, Any]) -> Dict[str, Any]:
+        """Get billing for the input sentence.
+        """
+        if self.usages is not None:
+            if sentence is not None and 'end_time' in sentence and sentence[
+                    'end_time'] is not None:
+                for usage in self.usages:
+                    if usage['end_time'] == sentence['end_time']:
+                        return usage['usage']
+
+        return None
+
+    @staticmethod
+    def is_sentence_end(sentence: Dict[str, Any]) -> bool:
+        """Determine whether the speech recognition result is the end of a sentence.
+           This is a static method.
+        """
+        if sentence is not None and 'end_time' in sentence and sentence[
+                'end_time'] is not None:
+            return True
+        else:
+            return False
+
+
+class RecognitionCallback():
+    """An interface that defines callback methods for getting speech recognition results. # noqa E501
+       Derive from this class and implement its function to provide your own data.
+    """
+    def on_open(self) -> None:
+        pass
+
+    def on_complete(self) -> None:
+        pass
+
+    def on_error(self, result: RecognitionResult) -> None:
+        pass
+
+    def on_close(self) -> None:
+        pass
+
+    def on_event(self, result: RecognitionResult) -> None:
+        pass
+
+
+class Recognition(BaseApi):
+    """Speech recognition interface.
+
+    Args:
+        model (str): The requested model_id.
+        callback (RecognitionCallback): A callback that returns
+            speech recognition results.
+        format (str): The input audio format for speech recognition.
+        sample_rate (int): The input audio sample rate for speech recognition.
+        workspace (str): The dashscope workspace id.
+
+        **kwargs:
+            phrase_id (list, `optional`): The ID of phrase.
+            disfluency_removal_enabled(bool, `optional`): Filter mood words,
+                turned off by default.
+            diarization_enabled (bool, `optional`): Speech auto diarization,
+                turned off by default.
+            speaker_count (int, `optional`): The number of speakers.
+            timestamp_alignment_enabled (bool, `optional`): Timestamp-alignment
+                calibration, turned off by default.
+            special_word_filter(str, `optional`): Sensitive word filter.
+            audio_event_detection_enabled(bool, `optional`):
+                Audio event detection, turned off by default.
+
+    Raises:
+        InputRequired: Input is required.
+    """
+
+    SILENCE_TIMEOUT_S = 23
+
+    def __init__(self,
+                 model: str,
+                 callback: RecognitionCallback,
+                 format: str,
+                 sample_rate: int,
+                 workspace: str = None,
+                 **kwargs):
+        if model is None:
+            raise ModelRequired('Model is required!')
+        if format is None:
+            raise InputRequired('format is required!')
+        if sample_rate is None:
+            raise InputRequired('sample_rate is required!')
+
+        self.model = model
+        self.format = format
+        self.sample_rate = sample_rate
+        # continuous recognition with start() or once recognition with call()
+        self._recognition_once = False
+        self._callback = callback
+        self._running = False
+        self._stream_data = Queue()
+        self._worker = None
+        self._silence_timer = None
+        self._kwargs = kwargs
+        self._workspace = workspace
+        self._start_stream_timestamp = -1
+        self._first_package_timestamp = -1
+        self._stop_stream_timestamp = -1
+        self._on_complete_timestamp = -1
+        self.request_id_confirmed = False
+        self.last_request_id = uuid.uuid4().hex
+
+    def __del__(self):
+        if self._running:
+            self._running = False
+            self._stream_data = Queue()
+            if self._worker is not None and self._worker.is_alive():
+                self._worker.join()
+            if self._silence_timer is not None and self._silence_timer.is_alive(  # noqa E501
+            ):
+                self._silence_timer.cancel()
+                self._silence_timer = None
+            if self._callback:
+                self._callback.on_close()
+
+    def __receive_worker(self):
+        """Asynchronously, initiate a real-time speech recognition request and
+           obtain the result for parsing.
+        """
+        responses = self.__launch_request()
+        for part in responses:
+            if part.status_code == HTTPStatus.OK:
+                if len(part.output) == 0 or ('finished' in part.output and part.output['finished'] == True):
+                    self._on_complete_timestamp = time.time() * 1000
+                    logger.debug('last package delay {}'.format(
+                        self.get_last_package_delay()))
+                    self._callback.on_complete()
+                else:
+                    usage: Dict[str, Any] = None
+                    usages: List[Any] = None
+                    if 'sentence' in part.output:
+                        if (self._first_package_timestamp < 0):
+                            self._first_package_timestamp = time.time() * 1000
+                            logger.debug('first package delay {}'.format(
+                                self.get_first_package_delay()))
+                        sentence = part.output['sentence']
+                        if 'heartbeat' in sentence and sentence['heartbeat'] == True:
+                            logger.debug('recv heartbeat')
+                            continue
+                        logger.debug(
+                            'Recv Result [rid:{}]:{}, isEnd: {}'.format(
+                                part.request_id, sentence,
+                                RecognitionResult.is_sentence_end(sentence)))
+                        if part.usage is not None:
+                            usage = {
+                                'end_time':
+                                part.output['sentence']['end_time'],
+                                'usage': part.usage
+                            }
+                            usages = [usage]
+                        if self.request_id_confirmed is False and part.request_id is not None:
+                            self.last_request_id = part.request_id
+                            self.request_id_confirmed = True
+
+                    self._callback.on_event(
+                        RecognitionResult(
+                            RecognitionResponse.from_api_response(part),
+                            usages=usages))
+            else:
+                self._running = False
+                self._stream_data = Queue()
+                self._callback.on_error(
+                    RecognitionResult(
+                        RecognitionResponse.from_api_response(part)))
+                self._callback.on_close()
+                break
+
+    def __launch_request(self):
+        """Initiate real-time speech recognition requests.
+        """
+        resources_list: list = []
+        if self._phrase is not None and len(self._phrase) > 0:
+            item = {'resource_id': self._phrase, 'resource_type': 'asr_phrase'}
+            resources_list.append(item)
+
+            if len(resources_list) > 0:
+                self._kwargs['resources'] = resources_list
+
+        self._tidy_kwargs()
+        task_name, _ = _get_task_group_and_task(__name__)
+        responses = super().call(model=self.model,
+                                 task_group='audio',
+                                 task=task_name,
+                                 function='recognition',
+                                 input=self._input_stream_cycle(),
+                                 api_protocol=ApiProtocol.WEBSOCKET,
+                                 ws_stream_mode=WebsocketStreamingMode.DUPLEX,
+                                 is_binary_input=True,
+                                 sample_rate=self.sample_rate,
+                                 format=self.format,
+                                 stream=True,
+                                 workspace=self._workspace,
+                                 pre_task_id=self.last_request_id,
+                                 **self._kwargs)
+        return responses
+
+    def start(self, phrase_id: str = None, **kwargs):
+        """Real-time speech recognition in asynchronous mode.
+           Please call 'stop()' after you have completed recognition.
+
+        Args:
+            phrase_id (str, `optional`): The ID of phrase.
+
+            **kwargs:
+                disfluency_removal_enabled(bool, `optional`):
+                    Filter mood words, turned off by default.
+                diarization_enabled (bool, `optional`):
+                    Speech auto diarization, turned off by default.
+                speaker_count (int, `optional`): The number of speakers.
+                timestamp_alignment_enabled (bool, `optional`):
+                    Timestamp-alignment calibration, turned off by default.
+                special_word_filter(str, `optional`): Sensitive word filter.
+                audio_event_detection_enabled(bool, `optional`):
+                    Audio event detection, turned off by default.
+
+        Raises:
+            InvalidParameter: This interface cannot be called again
+                if it has already been started.
+            InvalidTask: Task create failed.
+        """
+        assert self._callback is not None, 'Please set the callback to get the speech recognition result.'  # noqa E501
+
+        if self._running:
+            raise InvalidParameter('Speech recognition has started.')
+
+        self._start_stream_timestamp = -1
+        self._first_package_timestamp = -1
+        self._stop_stream_timestamp = -1
+        self._on_complete_timestamp = -1
+        self._phrase = phrase_id
+        self._kwargs.update(**kwargs)
+        self._recognition_once = False
+        self._worker = threading.Thread(target=self.__receive_worker)
+        self._worker.start()
+        if self._worker.is_alive():
+            self._running = True
+            self._callback.on_open()
+
+            # If audio data is not received for 23 seconds, the timeout exits
+            self._silence_timer = Timer(Recognition.SILENCE_TIMEOUT_S,
+                                        self._silence_stop_timer)
+            self._silence_timer.start()
+        else:
+            self._running = False
+            raise InvalidTask('Invalid task, task create failed.')
+
+    def call(self,
+             file: str,
+             phrase_id: str = None,
+             **kwargs) -> RecognitionResult:
+        """Real-time speech recognition in synchronous mode.
+
+        Args:
+            file (str): The path to the local audio file.
+            phrase_id (str, `optional`): The ID of phrase.
+
+            **kwargs:
+                disfluency_removal_enabled(bool, `optional`):
+                    Filter mood words, turned off by default.
+                diarization_enabled (bool, `optional`):
+                    Speech auto diarization, turned off by default.
+                speaker_count (int, `optional`): The number of speakers.
+                timestamp_alignment_enabled (bool, `optional`):
+                    Timestamp-alignment calibration, turned off by default.
+                special_word_filter(str, `optional`): Sensitive word filter.
+                audio_event_detection_enabled(bool, `optional`):
+                    Audio event detection, turned off by default.
+
+        Raises:
+            InvalidParameter: This interface cannot be called again
+                if it has already been started.
+            InputDataRequired: The supplied file was empty.
+
+        Returns:
+            RecognitionResult: The result of speech recognition.
+        """
+        self._start_stream_timestamp = time.time() * 1000
+        if self._running:
+            raise InvalidParameter('Speech recognition has been called.')
+
+        if os.path.exists(file):
+            if os.path.isdir(file):
+                raise IsADirectoryError('Is a directory: ' + file)
+        else:
+            raise FileNotFoundError('No such file or directory: ' + file)
+
+        self._recognition_once = True
+        self._stream_data = Queue()
+        self._phrase = phrase_id
+        self._kwargs.update(**kwargs)
+        error_flag: bool = False
+        sentences: List[Any] = []
+        usages: List[Any] = []
+        response: RecognitionResponse = None
+        result: RecognitionResult = None
+
+        try:
+            audio_data: bytes = None
+            f = open(file, 'rb')
+            if os.path.getsize(file):
+                while True:
+                    audio_data = f.read(12800)
+                    if not audio_data:
+                        break
+                    else:
+                        self._stream_data.put(audio_data)
+            else:
+                raise InputDataRequired(
+                    'The supplied file was empty (zero bytes long)')
+            f.close()
+            self._stop_stream_timestamp = time.time() * 1000
+        except Exception as e:
+            logger.error(e)
+            raise e
+
+        if not self._stream_data.empty():
+            self._running = True
+            responses = self.__launch_request()
+            for part in responses:
+                if part.status_code == HTTPStatus.OK:
+                    if 'sentence' in part.output:
+                        if (self._first_package_timestamp < 0):
+                            self._first_package_timestamp = time.time() * 1000
+                            logger.debug('first package delay {}'.format(
+                                self._first_package_timestamp -
+                                self._start_stream_timestamp))
+                        sentence = part.output['sentence']
+                        logger.debug(
+                            'Recv Result [rid:{}]:{}, isEnd: {}'.format(
+                                part.request_id, sentence,
+                                RecognitionResult.is_sentence_end(sentence)))
+                        if RecognitionResult.is_sentence_end(sentence):
+                            sentences.append(sentence)
+
+                            if part.usage is not None:
+                                usage = {
+                                    'end_time':
+                                    part.output['sentence']['end_time'],
+                                    'usage': part.usage
+                                }
+                                usages.append(usage)
+
+                    response = RecognitionResponse.from_api_response(part)
+                else:
+                    response = RecognitionResponse.from_api_response(part)
+                    logger.error(response)
+                    error_flag = True
+                    break
+
+        self._on_complete_timestamp = time.time() * 1000
+        logger.debug('last package delay {}'.format(
+            self.get_last_package_delay()))
+
+        if error_flag:
+            result = RecognitionResult(response)
+        else:
+            result = RecognitionResult(response, sentences, usages)
+
+        self._stream_data = Queue()
+        self._recognition_once = False
+        self._running = False
+
+        return result
+
+    def stop(self):
+        """End asynchronous speech recognition.
+
+        Raises:
+            InvalidParameter: Cannot stop an uninitiated recognition.
+        """
+        if self._running is False:
+            raise InvalidParameter('Speech recognition has stopped.')
+
+        self._stop_stream_timestamp = time.time() * 1000
+
+        self._running = False
+        if self._worker is not None and self._worker.is_alive():
+            self._worker.join()
+        self._stream_data = Queue()
+        if self._silence_timer is not None and self._silence_timer.is_alive():
+            self._silence_timer.cancel()
+            self._silence_timer = None
+        if self._callback:
+            self._callback.on_close()
+
+    def send_audio_frame(self, buffer: bytes):
+        """Push speech recognition.
+
+        Raises:
+            InvalidParameter: Cannot send data to an uninitiated recognition.
+        """
+        if self._running is False:
+            raise InvalidParameter('Speech recognition has stopped.')
+
+        if (self._start_stream_timestamp < 0):
+            self._start_stream_timestamp = time.time() * 1000
+        logger.debug('send_audio_frame: {}'.format(len(buffer)))
+        self._stream_data.put(buffer)
+
+    def _tidy_kwargs(self):
+        for k in self._kwargs.copy():
+            if self._kwargs[k] is None:
+                self._kwargs.pop(k, None)
+
+    def _input_stream_cycle(self):
+        while self._running:
+            while self._stream_data.empty():
+                if self._running:
+                    time.sleep(0.01)
+                    continue
+                else:
+                    break
+
+            # Reset silence_timer when getting stream.
+            if self._silence_timer is not None and self._silence_timer.is_alive(  # noqa E501
+            ):
+                self._silence_timer.cancel()
+                self._silence_timer = Timer(Recognition.SILENCE_TIMEOUT_S,
+                                            self._silence_stop_timer)
+                self._silence_timer.start()
+
+            while not self._stream_data.empty():
+                frame = self._stream_data.get()
+                yield bytes(frame)
+
+            if self._recognition_once:
+                self._running = False
+
+        # drain all audio data when invoking stop().
+        if self._recognition_once is False:
+            while not self._stream_data.empty():
+                frame = self._stream_data.get()
+                yield bytes(frame)
+
+    def _silence_stop_timer(self):
+        """If audio data is not received for a long time, exit worker.
+        """
+        self._running = False
+        if self._silence_timer is not None and self._silence_timer.is_alive():
+            self._silence_timer.cancel()
+        self._silence_timer = None
+        if self._worker is not None and self._worker.is_alive():
+            self._worker.join()
+        self._stream_data = Queue()
+
+    def get_first_package_delay(self):
+        """First Package Delay is the time between start sending audio and receive first words package
+        """
+        return self._first_package_timestamp - self._start_stream_timestamp
+
+    def get_last_package_delay(self):
+        """Last Package Delay is the time between stop sending audio and receive last words package
+        """
+        return self._on_complete_timestamp - self._stop_stream_timestamp
+
+    # 获取上一个任务的taskId
+    def get_last_request_id(self):
+        return self.last_request_id
--- a/backend_service/venv/lib/python3.13/site-packages/dashscope/audio/asr/transcription.py
+++ b/backend_service/venv/lib/python3.13/site-packages/dashscope/audio/asr/transcription.py
@@ -0,0 +1,231 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import asyncio
+import time
+from typing import List, Union
+
+import aiohttp
+
+from dashscope.api_entities.dashscope_response import (DashScopeAPIResponse,
+                                                       TranscriptionResponse)
+from dashscope.client.base_api import BaseAsyncApi
+from dashscope.common.constants import ApiProtocol, HTTPMethod
+from dashscope.common.logging import logger
+from dashscope.common.utils import _get_task_group_and_task
+
+
+class Transcription(BaseAsyncApi):
+    """API for File Transcription models.
+    """
+
+    MAX_QUERY_TRY_COUNT = 3
+
+    class Models:
+        paraformer_v1 = 'paraformer-v1'
+        paraformer_8k_v1 = 'paraformer-8k-v1'
+        paraformer_mtl_v1 = 'paraformer-mtl-v1'
+
+    @classmethod
+    def call(cls,
+             model: str,
+             file_urls: List[str],
+             phrase_id: str = None,
+             api_key: str = None,
+             workspace: str = None,
+             **kwargs) -> TranscriptionResponse:
+        """Transcribe the given files synchronously.
+
+        Args:
+            model (str): The requested model_id.
+            file_urls (List[str]): List of stored URLs.
+            phrase_id (str, `optional`): The ID of phrase.
+            workspace (str): The dashscope workspace id.
+
+            **kwargs:
+                channel_id (List[int], optional):
+                    The selected channel_id of audio file.
+                disfluency_removal_enabled(bool, `optional`):
+                    Filter mood words, turned off by default.
+                diarization_enabled (bool, `optional`):
+                    Speech auto diarization, turned off by default.
+                speaker_count (int, `optional`): The number of speakers.
+                timestamp_alignment_enabled (bool, `optional`):
+                    Timestamp-alignment calibration, turned off by default.
+                special_word_filter(str, `optional`): Sensitive word filter.
+                audio_event_detection_enabled(bool, `optional`):
+                    Audio event detection, turned off by default.
+
+        Returns:
+            TranscriptionResponse: The result of batch transcription.
+        """
+        kwargs.update(cls._fill_resource_id(phrase_id, **kwargs))
+        kwargs = cls._tidy_kwargs(**kwargs)
+        response = super().call(model,
+                                file_urls,
+                                api_key=api_key,
+                                workspace=workspace,
+                                **kwargs)
+        return TranscriptionResponse.from_api_response(response)
+
+    @classmethod
+    def async_call(cls,
+                   model: str,
+                   file_urls: List[str],
+                   phrase_id: str = None,
+                   api_key: str = None,
+                   workspace: str = None,
+                   **kwargs) -> TranscriptionResponse:
+        """Transcribe the given files asynchronously,
+        return the status of task submission for querying results subsequently.
+
+        Args:
+            model (str): The requested model, such as paraformer-16k-1
+            file_urls (List[str]): List of stored URLs.
+            phrase_id (str, `optional`): The ID of phrase.
+            workspace (str): The dashscope workspace id.
+
+        **kwargs:
+            channel_id (List[int], optional):
+                The selected channel_id of audio file.
+            disfluency_removal_enabled(bool, `optional`):
+                Filter mood words, turned off by default.
+            diarization_enabled (bool, `optional`):
+                Speech auto diarization, turned off by default.
+            speaker_count (int, `optional`): The number of speakers.
+            timestamp_alignment_enabled (bool, `optional`):
+                Timestamp-alignment calibration, turned off by default.
+            special_word_filter(str, `optional`): Sensitive word filter.
+            audio_event_detection_enabled(bool, `optional`):
+                Audio event detection, turned off by default.
+
+        Returns:
+            TranscriptionResponse: The response including task_id.
+        """
+        kwargs.update(cls._fill_resource_id(phrase_id, **kwargs))
+        kwargs = cls._tidy_kwargs(**kwargs)
+        response = cls._launch_request(model,
+                                       file_urls,
+                                       api_key=api_key,
+                                       workspace=workspace,
+                                       **kwargs)
+        return TranscriptionResponse.from_api_response(response)
+
+    @classmethod
+    def fetch(cls,
+              task: Union[str, TranscriptionResponse],
+              api_key: str = None,
+              workspace: str = None,
+              **kwargs) -> TranscriptionResponse:
+        """Fetch the status of task, including results of batch transcription when task_status is SUCCEEDED.  # noqa: E501
+
+        Args:
+            task (Union[str, TranscriptionResponse]): The task_id or
+                response including task_id returned from async_call().
+            workspace (str): The dashscope workspace id.
+
+        Returns:
+            TranscriptionResponse: The status of task_id,
+        including results of batch transcription when task_status is SUCCEEDED.
+        """
+        try_count: int = 0
+        while True:
+            try:
+                response = super().fetch(task,
+                                         api_key=api_key,
+                                         workspace=workspace,
+                                         **kwargs)
+            except (asyncio.TimeoutError, aiohttp.ClientConnectorError) as e:
+                logger.error(e)
+                try_count += 1
+                if try_count <= Transcription.MAX_QUERY_TRY_COUNT:
+                    time.sleep(2)
+                    continue
+
+            try_count = 0
+            break
+
+        return TranscriptionResponse.from_api_response(response)
+
+    @classmethod
+    def wait(cls,
+             task: Union[str, TranscriptionResponse],
+             api_key: str = None,
+             workspace: str = None,
+             **kwargs) -> TranscriptionResponse:
+        """Poll task until the final results of transcription is obtained.
+
+        Args:
+            task (Union[str, TranscriptionResponse]): The task_id or
+                response including task_id returned from async_call().
+            workspace (str): The dashscope workspace id.
+
+        Returns:
+            TranscriptionResponse: The result of batch transcription.
+        """
+        response = super().wait(task,
+                                api_key=api_key,
+                                workspace=workspace,
+                                **kwargs)
+        return TranscriptionResponse.from_api_response(response)
+
+    @classmethod
+    def _launch_request(cls,
+                        model: str,
+                        files: List[str],
+                        api_key: str = None,
+                        workspace: str = None,
+                        **kwargs) -> DashScopeAPIResponse:
+        """Submit transcribe request.
+
+        Args:
+            model (str): The requested model, such as paraformer-16k-1
+            files (List[str]): List of stored URLs.
+            workspace (str): The dashscope workspace id.
+
+        Returns:
+            DashScopeAPIResponse: The result of task submission.
+        """
+        task_name, function = _get_task_group_and_task(__name__)
+
+        try_count: int = 0
+        while True:
+            try:
+                response = super().async_call(model=model,
+                                              task_group='audio',
+                                              task=task_name,
+                                              function=function,
+                                              input={'file_urls': files},
+                                              api_protocol=ApiProtocol.HTTP,
+                                              http_method=HTTPMethod.POST,
+                                              api_key=api_key,
+                                              workspace=workspace,
+                                              **kwargs)
+            except (asyncio.TimeoutError, aiohttp.ClientConnectorError) as e:
+                logger.error(e)
+                try_count += 1
+                if try_count <= Transcription.MAX_QUERY_TRY_COUNT:
+                    time.sleep(2)
+                    continue
+
+            break
+
+        return response
+
+    @classmethod
+    def _fill_resource_id(cls, phrase_id: str, **kwargs):
+        resources_list: list = []
+        if phrase_id is not None and len(phrase_id) > 0:
+            item = {'resource_id': phrase_id, 'resource_type': 'asr_phrase'}
+            resources_list.append(item)
+
+            if len(resources_list) > 0:
+                kwargs['resources'] = resources_list
+
+        return kwargs
+
+    @classmethod
+    def _tidy_kwargs(cls, **kwargs):
+        for k in kwargs.copy():
+            if kwargs[k] is None:
+                kwargs.pop(k, None)
+        return kwargs
--- a/backend_service/venv/lib/python3.13/site-packages/dashscope/audio/asr/translation_recognizer.py
+++ b/backend_service/venv/lib/python3.13/site-packages/dashscope/audio/asr/translation_recognizer.py
--- a/backend_service/venv/lib/python3.13/site-packages/dashscope/audio/asr/vocabulary.py
+++ b/backend_service/venv/lib/python3.13/site-packages/dashscope/audio/asr/vocabulary.py
@@ -0,0 +1,177 @@
+# Copyright (c) Alibaba, Inc. and its affiliates.
+
+import asyncio
+import time
+from typing import List
+
+import aiohttp
+
+from dashscope.client.base_api import BaseApi
+from dashscope.common.constants import ApiProtocol, HTTPMethod
+from dashscope.common.logging import logger
+
+
+class VocabularyServiceException(Exception):
+    def __init__(self, request_id: str, status_code: int, code: str,
+                 error_message: str) -> None:
+        self._request_id = request_id
+        self._status_code = status_code
+        self._code = code
+        self._error_message = error_message
+
+    def __str__(self):
+        return f'Request: {self._request_id}, Status Code: {self._status_code}, Code: {self._code}, Error Message: {self._error_message}'
+
+
+class VocabularyService(BaseApi):
+    '''
+    API for asr vocabulary service
+    '''
+    MAX_QUERY_TRY_COUNT = 3
+
+    def __init__(self,
+                 api_key=None,
+                 workspace=None,
+                 model=None,
+                 **kwargs) -> None:
+        super().__init__()
+        self._api_key = api_key
+        self._workspace = workspace
+        self._kwargs = kwargs
+        self._last_request_id = None
+        self.model = model
+        if self.model is None:
+            self.model = 'speech-biasing'
+
+    def __call_with_input(self, input):
+        try_count = 0
+        while True:
+            try:
+                response = super().call(model=self.model,
+                                        task_group='audio',
+                                        task='asr',
+                                        function='customization',
+                                        input=input,
+                                        api_protocol=ApiProtocol.HTTP,
+                                        http_method=HTTPMethod.POST,
+                                        api_key=self._api_key,
+                                        workspace=self._workspace,
+                                        **self._kwargs)
+            except (asyncio.TimeoutError, aiohttp.ClientConnectorError) as e:
+                logger.error(e)
+                try_count += 1
+                if try_count <= VocabularyService.MAX_QUERY_TRY_COUNT:
+                    time.sleep(2)
+                    continue
+
+            break
+        logger.debug('>>>>recv', response)
+        return response
+
+    def create_vocabulary(self, target_model: str, prefix: str,
+                          vocabulary: List[dict]) -> str:
+        '''
+        创建热词表
+        param: target_model 热词表对应的语音识别模型版本
+        param: prefix 热词表自定义前缀，仅允许数字和小写字母，小于十个字符。
+        param: vocabulary 热词表字典
+        return: 热词表标识符 vocabulary_id
+        '''
+        response = self.__call_with_input(input={
+            'action': 'create_vocabulary',
+            'target_model': target_model,
+            'prefix': prefix,
+            'vocabulary': vocabulary,
+        }, )
+        if response.status_code == 200:
+            self._last_request_id = response.request_id
+            return response.output['vocabulary_id']
+        else:
+            raise VocabularyServiceException(response.request_id, response.status_code,
+                                             response.code, response.message)
+
+    def list_vocabularies(self,
+                          prefix=None,
+                          page_index: int = 0,
+                          page_size: int = 10) -> List[dict]:
+        '''
+        查询已创建的所有热词表
+        param: prefix 自定义前缀，如果设定则只返回指定前缀的热词表标识符列表。
+        param: page_index 查询的页索引
+        param: page_size 查询页大小
+        return: 热词表标识符列表
+        '''
+        if prefix:
+            response = self.__call_with_input(input={
+                'action': 'list_vocabulary',
+                'prefix': prefix,
+                'page_index': page_index,
+                'page_size': page_size,
+            }, )
+        else:
+            response = self.__call_with_input(input={
+                'action': 'list_vocabulary',
+                'page_index': page_index,
+                'page_size': page_size,
+            }, )
+        if response.status_code == 200:
+            self._last_request_id = response.request_id
+            return response.output['vocabulary_list']
+        else:
+            raise VocabularyServiceException(response.request_id, response.status_code,
+                                             response.code, response.message)
+
+    def query_vocabulary(self, vocabulary_id: str) -> List[dict]:
+        '''
+        获取热词表内容
+        param: vocabulary_id 热词表标识符
+        return: 热词表
+        '''
+        response = self.__call_with_input(input={
+            'action': 'query_vocabulary',
+            'vocabulary_id': vocabulary_id,
+        }, )
+        if response.status_code == 200:
+            self._last_request_id = response.request_id
+            return response.output
+        else:
+            raise VocabularyServiceException(response.request_id, response.status_code,
+                                             response.code, response.message)
+
+    def update_vocabulary(self, vocabulary_id: str,
+                          vocabulary: List[dict]) -> None:
+        '''
+        用新的热词表替换已有热词表
+        param: vocabulary_id 需要替换的热词表标识符
+        param: vocabulary 热词表
+        '''
+        response = self.__call_with_input(input={
+            'action': 'update_vocabulary',
+            'vocabulary_id': vocabulary_id,
+            'vocabulary': vocabulary,
+        }, )
+        if response.status_code == 200:
+            self._last_request_id = response.request_id
+            return
+        else:
+            raise VocabularyServiceException(response.request_id, response.status_code,
+                                             response.code, response.message)
+
+    def delete_vocabulary(self, vocabulary_id: str) -> None:
+        '''
+        删除热词表
+        param: vocabulary_id 需要删除的热词表标识符
+        '''
+        response = self.__call_with_input(input={
+            'action': 'delete_vocabulary',
+            'vocabulary_id': vocabulary_id,
+        }, )
+        if response.status_code == 200:
+            self._last_request_id = response.request_id
+            return
+        else:
+            raise VocabularyServiceException(response.request_id, response.status_code,
+                                             response.code, response.message)
+
+    def get_last_request_id(self):
+        return self._last_request_id