chore: 添加虚拟环境到仓库

- 添加 backend_service/venv 虚拟环境
- 包含所有Python依赖包
- 注意:虚拟环境约393MB,包含12655个文件
This commit is contained in:
2025-12-03 10:19:25 +08:00
parent a6c2027caa
commit c4f851d387
12655 changed files with 3009376 additions and 0 deletions

View File

@@ -0,0 +1,20 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from .asr_phrase_manager import AsrPhraseManager
from .recognition import Recognition, RecognitionCallback, RecognitionResult
from .transcription import Transcription
from .translation_recognizer import (TranscriptionResult, Translation,
TranslationRecognizerCallback,
TranslationRecognizerChat,
TranslationRecognizerRealtime,
TranslationRecognizerResultPack,
TranslationResult)
from .vocabulary import VocabularyService, VocabularyServiceException
__all__ = [
'Transcription', 'Recognition', 'RecognitionCallback', 'RecognitionResult',
'AsrPhraseManager', 'VocabularyServiceException', 'VocabularyService',
'TranslationRecognizerRealtime', 'TranslationRecognizerChat',
'TranslationRecognizerCallback', 'Translation', 'TranslationResult',
'TranscriptionResult', 'TranslationRecognizerResultPack'
]

View File

@@ -0,0 +1,203 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
from http import HTTPStatus
from typing import Any, Dict
from dashscope.api_entities.dashscope_response import DashScopeAPIResponse
from dashscope.client.base_api import BaseAsyncApi
from dashscope.common.error import InvalidParameter
from dashscope.common.logging import logger
from dashscope.customize.finetunes import FineTunes
class AsrPhraseManager(BaseAsyncApi):
"""Hot word management for speech recognition.
"""
@classmethod
def create_phrases(cls,
model: str,
phrases: Dict[str, Any],
training_type: str = 'compile_asr_phrase',
workspace: str = None,
**kwargs) -> DashScopeAPIResponse:
"""Create hot words.
Args:
model (str): The requested model.
phrases (Dict[str, Any]): A dictionary that contains phrases,
such as {'下一首':90,'上一首':90}.
training_type (str, `optional`): The training type,
'compile_asr_phrase' is default.
workspace (str): The dashscope workspace id.
Raises:
InvalidParameter: Parameter input is None or empty!
Returns:
DashScopeAPIResponse: The results of creating hot words.
"""
if phrases is None or len(phrases) == 0:
raise InvalidParameter('phrases is empty!')
if training_type is None or len(training_type) == 0:
raise InvalidParameter('training_type is empty!')
original_ft_sub_path = FineTunes.SUB_PATH
FineTunes.SUB_PATH = 'fine-tunes'
response = FineTunes.call(model=model,
training_file_ids=[],
validation_file_ids=[],
mode=training_type,
hyper_parameters={'phrase_list': phrases},
workspace=workspace,
**kwargs)
FineTunes.SUB_PATH = original_ft_sub_path
if response.status_code != HTTPStatus.OK:
logger.error('Create phrase failed, ' + str(response))
return response
@classmethod
def update_phrases(cls,
model: str,
phrase_id: str,
phrases: Dict[str, Any],
training_type: str = 'compile_asr_phrase',
workspace: str = None,
**kwargs) -> DashScopeAPIResponse:
"""Update the hot words marked phrase_id.
Args:
model (str): The requested model.
phrase_id (str): The ID of phrases,
which created by create_phrases().
phrases (Dict[str, Any]): A dictionary that contains phrases,
such as {'暂停':90}.
training_type (str, `optional`):
The training type, 'compile_asr_phrase' is default.
workspace (str): The dashscope workspace id.
Raises:
InvalidParameter: Parameter input is None or empty!
Returns:
DashScopeAPIResponse: The results of updating hot words.
"""
if phrase_id is None or len(phrase_id) == 0:
raise InvalidParameter('phrase_id is empty!')
if phrases is None or len(phrases) == 0:
raise InvalidParameter('phrases is empty!')
if training_type is None or len(training_type) == 0:
raise InvalidParameter('training_type is empty!')
original_ft_sub_path = FineTunes.SUB_PATH
FineTunes.SUB_PATH = 'fine-tunes'
response = FineTunes.call(model=model,
training_file_ids=[],
validation_file_ids=[],
mode=training_type,
hyper_parameters={'phrase_list': phrases},
finetuned_output=phrase_id,
workspace=workspace,
**kwargs)
FineTunes.SUB_PATH = original_ft_sub_path
if response.status_code != HTTPStatus.OK:
logger.error('Update phrase failed, ' + str(response))
return response
@classmethod
def query_phrases(cls,
phrase_id: str,
workspace: str = None,
**kwargs) -> DashScopeAPIResponse:
"""Query the hot words by phrase_id.
Args:
phrase_id (str): The ID of phrases,
which created by create_phrases().
workspace (str): The dashscope workspace id.
Raises:
InvalidParameter: phrase_id input is None or empty!
Returns:
AsrPhraseManagerResult: The results of querying hot words.
"""
if phrase_id is None or len(phrase_id) == 0:
raise InvalidParameter('phrase_id is empty!')
original_ft_sub_path = FineTunes.SUB_PATH
FineTunes.SUB_PATH = 'fine-tunes/outputs'
response = FineTunes.get(job_id=phrase_id,
workspace=workspace,
**kwargs)
FineTunes.SUB_PATH = original_ft_sub_path
if response.status_code != HTTPStatus.OK:
logger.error('Query phrase failed, ' + str(response))
return response
@classmethod
def list_phrases(cls,
page: int = 1,
page_size: int = 10,
workspace: str = None,
**kwargs) -> DashScopeAPIResponse:
"""List all information of phrases.
Args:
page (int): Page number, greater than 0, default value 1.
page_size (int): The paging size, greater than 0
and less than or equal to 100, default value 10.
workspace (str): The dashscope workspace id.
Returns:
DashScopeAPIResponse: The results of listing hot words.
"""
original_ft_sub_path = FineTunes.SUB_PATH
FineTunes.SUB_PATH = 'fine-tunes/outputs'
response = FineTunes.list(page=page,
page_size=page_size,
workspace=workspace,
**kwargs)
FineTunes.SUB_PATH = original_ft_sub_path
if response.status_code != HTTPStatus.OK:
logger.error('List phrase failed, ' + str(response))
return response
@classmethod
def delete_phrases(cls,
phrase_id: str,
workspace: str = None,
**kwargs) -> DashScopeAPIResponse:
"""Delete the hot words by phrase_id.
Args:
phrase_id (str): The ID of phrases,
which created by create_phrases().
Raises:
InvalidParameter: phrase_id input is None or empty!
Returns:
DashScopeAPIResponse: The results of deleting hot words.
"""
if phrase_id is None or len(phrase_id) == 0:
raise InvalidParameter('phrase_id is empty!')
original_ft_sub_path = FineTunes.SUB_PATH
FineTunes.SUB_PATH = 'fine-tunes/outputs'
response = FineTunes.delete(job_id=phrase_id,
workspace=workspace,
**kwargs)
FineTunes.SUB_PATH = original_ft_sub_path
if response.status_code != HTTPStatus.OK:
logger.error('Delete phrase failed, ' + str(response))
return response

View File

@@ -0,0 +1,527 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import json
import os
import threading
import time
import uuid
from http import HTTPStatus
from queue import Queue
from threading import Timer
from typing import Any, Dict, List, Union
from dashscope.api_entities.dashscope_response import RecognitionResponse
from dashscope.client.base_api import BaseApi
from dashscope.common.constants import ApiProtocol
from dashscope.common.error import (InputDataRequired, InputRequired,
InvalidParameter, InvalidTask,
ModelRequired)
from dashscope.common.logging import logger
from dashscope.common.utils import _get_task_group_and_task
from dashscope.protocol.websocket import WebsocketStreamingMode
class RecognitionResult(RecognitionResponse):
"""The result set of speech recognition, including the single-sentence
recognition result returned by the callback mode, and all recognition
results in a synchronized manner.
"""
def __init__(self,
response: RecognitionResponse,
sentences: List[Any] = None,
usages: List[Any] = None):
self.status_code = response.status_code
self.request_id = response.request_id
self.code = response.code
self.message = response.message
self.usages = usages
if sentences is not None and len(sentences) > 0:
self.output = {'sentence': sentences}
else:
self.output = response.output
if self.usages is not None and len(
self.usages) > 0 and 'usage' in self.usages[-1]:
self.usage = self.usages[-1]['usage']
else:
self.usage = None
def __str__(self):
return json.dumps(RecognitionResponse.from_api_response(self),
ensure_ascii=False)
def get_sentence(self) -> Union[Dict[str, Any], List[Any]]:
"""The result of speech recognition.
"""
if self.output and 'sentence' in self.output:
return self.output['sentence']
else:
return None
def get_request_id(self) -> str:
"""The request_id of speech recognition.
"""
return self.request_id
def get_usage(self, sentence: Dict[str, Any]) -> Dict[str, Any]:
"""Get billing for the input sentence.
"""
if self.usages is not None:
if sentence is not None and 'end_time' in sentence and sentence[
'end_time'] is not None:
for usage in self.usages:
if usage['end_time'] == sentence['end_time']:
return usage['usage']
return None
@staticmethod
def is_sentence_end(sentence: Dict[str, Any]) -> bool:
"""Determine whether the speech recognition result is the end of a sentence.
This is a static method.
"""
if sentence is not None and 'end_time' in sentence and sentence[
'end_time'] is not None:
return True
else:
return False
class RecognitionCallback():
"""An interface that defines callback methods for getting speech recognition results. # noqa E501
Derive from this class and implement its function to provide your own data.
"""
def on_open(self) -> None:
pass
def on_complete(self) -> None:
pass
def on_error(self, result: RecognitionResult) -> None:
pass
def on_close(self) -> None:
pass
def on_event(self, result: RecognitionResult) -> None:
pass
class Recognition(BaseApi):
"""Speech recognition interface.
Args:
model (str): The requested model_id.
callback (RecognitionCallback): A callback that returns
speech recognition results.
format (str): The input audio format for speech recognition.
sample_rate (int): The input audio sample rate for speech recognition.
workspace (str): The dashscope workspace id.
**kwargs:
phrase_id (list, `optional`): The ID of phrase.
disfluency_removal_enabled(bool, `optional`): Filter mood words,
turned off by default.
diarization_enabled (bool, `optional`): Speech auto diarization,
turned off by default.
speaker_count (int, `optional`): The number of speakers.
timestamp_alignment_enabled (bool, `optional`): Timestamp-alignment
calibration, turned off by default.
special_word_filter(str, `optional`): Sensitive word filter.
audio_event_detection_enabled(bool, `optional`):
Audio event detection, turned off by default.
Raises:
InputRequired: Input is required.
"""
SILENCE_TIMEOUT_S = 23
def __init__(self,
model: str,
callback: RecognitionCallback,
format: str,
sample_rate: int,
workspace: str = None,
**kwargs):
if model is None:
raise ModelRequired('Model is required!')
if format is None:
raise InputRequired('format is required!')
if sample_rate is None:
raise InputRequired('sample_rate is required!')
self.model = model
self.format = format
self.sample_rate = sample_rate
# continuous recognition with start() or once recognition with call()
self._recognition_once = False
self._callback = callback
self._running = False
self._stream_data = Queue()
self._worker = None
self._silence_timer = None
self._kwargs = kwargs
self._workspace = workspace
self._start_stream_timestamp = -1
self._first_package_timestamp = -1
self._stop_stream_timestamp = -1
self._on_complete_timestamp = -1
self.request_id_confirmed = False
self.last_request_id = uuid.uuid4().hex
def __del__(self):
if self._running:
self._running = False
self._stream_data = Queue()
if self._worker is not None and self._worker.is_alive():
self._worker.join()
if self._silence_timer is not None and self._silence_timer.is_alive( # noqa E501
):
self._silence_timer.cancel()
self._silence_timer = None
if self._callback:
self._callback.on_close()
def __receive_worker(self):
"""Asynchronously, initiate a real-time speech recognition request and
obtain the result for parsing.
"""
responses = self.__launch_request()
for part in responses:
if part.status_code == HTTPStatus.OK:
if len(part.output) == 0 or ('finished' in part.output and part.output['finished'] == True):
self._on_complete_timestamp = time.time() * 1000
logger.debug('last package delay {}'.format(
self.get_last_package_delay()))
self._callback.on_complete()
else:
usage: Dict[str, Any] = None
usages: List[Any] = None
if 'sentence' in part.output:
if (self._first_package_timestamp < 0):
self._first_package_timestamp = time.time() * 1000
logger.debug('first package delay {}'.format(
self.get_first_package_delay()))
sentence = part.output['sentence']
if 'heartbeat' in sentence and sentence['heartbeat'] == True:
logger.debug('recv heartbeat')
continue
logger.debug(
'Recv Result [rid:{}]:{}, isEnd: {}'.format(
part.request_id, sentence,
RecognitionResult.is_sentence_end(sentence)))
if part.usage is not None:
usage = {
'end_time':
part.output['sentence']['end_time'],
'usage': part.usage
}
usages = [usage]
if self.request_id_confirmed is False and part.request_id is not None:
self.last_request_id = part.request_id
self.request_id_confirmed = True
self._callback.on_event(
RecognitionResult(
RecognitionResponse.from_api_response(part),
usages=usages))
else:
self._running = False
self._stream_data = Queue()
self._callback.on_error(
RecognitionResult(
RecognitionResponse.from_api_response(part)))
self._callback.on_close()
break
def __launch_request(self):
"""Initiate real-time speech recognition requests.
"""
resources_list: list = []
if self._phrase is not None and len(self._phrase) > 0:
item = {'resource_id': self._phrase, 'resource_type': 'asr_phrase'}
resources_list.append(item)
if len(resources_list) > 0:
self._kwargs['resources'] = resources_list
self._tidy_kwargs()
task_name, _ = _get_task_group_and_task(__name__)
responses = super().call(model=self.model,
task_group='audio',
task=task_name,
function='recognition',
input=self._input_stream_cycle(),
api_protocol=ApiProtocol.WEBSOCKET,
ws_stream_mode=WebsocketStreamingMode.DUPLEX,
is_binary_input=True,
sample_rate=self.sample_rate,
format=self.format,
stream=True,
workspace=self._workspace,
pre_task_id=self.last_request_id,
**self._kwargs)
return responses
def start(self, phrase_id: str = None, **kwargs):
"""Real-time speech recognition in asynchronous mode.
Please call 'stop()' after you have completed recognition.
Args:
phrase_id (str, `optional`): The ID of phrase.
**kwargs:
disfluency_removal_enabled(bool, `optional`):
Filter mood words, turned off by default.
diarization_enabled (bool, `optional`):
Speech auto diarization, turned off by default.
speaker_count (int, `optional`): The number of speakers.
timestamp_alignment_enabled (bool, `optional`):
Timestamp-alignment calibration, turned off by default.
special_word_filter(str, `optional`): Sensitive word filter.
audio_event_detection_enabled(bool, `optional`):
Audio event detection, turned off by default.
Raises:
InvalidParameter: This interface cannot be called again
if it has already been started.
InvalidTask: Task create failed.
"""
assert self._callback is not None, 'Please set the callback to get the speech recognition result.' # noqa E501
if self._running:
raise InvalidParameter('Speech recognition has started.')
self._start_stream_timestamp = -1
self._first_package_timestamp = -1
self._stop_stream_timestamp = -1
self._on_complete_timestamp = -1
self._phrase = phrase_id
self._kwargs.update(**kwargs)
self._recognition_once = False
self._worker = threading.Thread(target=self.__receive_worker)
self._worker.start()
if self._worker.is_alive():
self._running = True
self._callback.on_open()
# If audio data is not received for 23 seconds, the timeout exits
self._silence_timer = Timer(Recognition.SILENCE_TIMEOUT_S,
self._silence_stop_timer)
self._silence_timer.start()
else:
self._running = False
raise InvalidTask('Invalid task, task create failed.')
def call(self,
file: str,
phrase_id: str = None,
**kwargs) -> RecognitionResult:
"""Real-time speech recognition in synchronous mode.
Args:
file (str): The path to the local audio file.
phrase_id (str, `optional`): The ID of phrase.
**kwargs:
disfluency_removal_enabled(bool, `optional`):
Filter mood words, turned off by default.
diarization_enabled (bool, `optional`):
Speech auto diarization, turned off by default.
speaker_count (int, `optional`): The number of speakers.
timestamp_alignment_enabled (bool, `optional`):
Timestamp-alignment calibration, turned off by default.
special_word_filter(str, `optional`): Sensitive word filter.
audio_event_detection_enabled(bool, `optional`):
Audio event detection, turned off by default.
Raises:
InvalidParameter: This interface cannot be called again
if it has already been started.
InputDataRequired: The supplied file was empty.
Returns:
RecognitionResult: The result of speech recognition.
"""
self._start_stream_timestamp = time.time() * 1000
if self._running:
raise InvalidParameter('Speech recognition has been called.')
if os.path.exists(file):
if os.path.isdir(file):
raise IsADirectoryError('Is a directory: ' + file)
else:
raise FileNotFoundError('No such file or directory: ' + file)
self._recognition_once = True
self._stream_data = Queue()
self._phrase = phrase_id
self._kwargs.update(**kwargs)
error_flag: bool = False
sentences: List[Any] = []
usages: List[Any] = []
response: RecognitionResponse = None
result: RecognitionResult = None
try:
audio_data: bytes = None
f = open(file, 'rb')
if os.path.getsize(file):
while True:
audio_data = f.read(12800)
if not audio_data:
break
else:
self._stream_data.put(audio_data)
else:
raise InputDataRequired(
'The supplied file was empty (zero bytes long)')
f.close()
self._stop_stream_timestamp = time.time() * 1000
except Exception as e:
logger.error(e)
raise e
if not self._stream_data.empty():
self._running = True
responses = self.__launch_request()
for part in responses:
if part.status_code == HTTPStatus.OK:
if 'sentence' in part.output:
if (self._first_package_timestamp < 0):
self._first_package_timestamp = time.time() * 1000
logger.debug('first package delay {}'.format(
self._first_package_timestamp -
self._start_stream_timestamp))
sentence = part.output['sentence']
logger.debug(
'Recv Result [rid:{}]:{}, isEnd: {}'.format(
part.request_id, sentence,
RecognitionResult.is_sentence_end(sentence)))
if RecognitionResult.is_sentence_end(sentence):
sentences.append(sentence)
if part.usage is not None:
usage = {
'end_time':
part.output['sentence']['end_time'],
'usage': part.usage
}
usages.append(usage)
response = RecognitionResponse.from_api_response(part)
else:
response = RecognitionResponse.from_api_response(part)
logger.error(response)
error_flag = True
break
self._on_complete_timestamp = time.time() * 1000
logger.debug('last package delay {}'.format(
self.get_last_package_delay()))
if error_flag:
result = RecognitionResult(response)
else:
result = RecognitionResult(response, sentences, usages)
self._stream_data = Queue()
self._recognition_once = False
self._running = False
return result
def stop(self):
"""End asynchronous speech recognition.
Raises:
InvalidParameter: Cannot stop an uninitiated recognition.
"""
if self._running is False:
raise InvalidParameter('Speech recognition has stopped.')
self._stop_stream_timestamp = time.time() * 1000
self._running = False
if self._worker is not None and self._worker.is_alive():
self._worker.join()
self._stream_data = Queue()
if self._silence_timer is not None and self._silence_timer.is_alive():
self._silence_timer.cancel()
self._silence_timer = None
if self._callback:
self._callback.on_close()
def send_audio_frame(self, buffer: bytes):
"""Push speech recognition.
Raises:
InvalidParameter: Cannot send data to an uninitiated recognition.
"""
if self._running is False:
raise InvalidParameter('Speech recognition has stopped.')
if (self._start_stream_timestamp < 0):
self._start_stream_timestamp = time.time() * 1000
logger.debug('send_audio_frame: {}'.format(len(buffer)))
self._stream_data.put(buffer)
def _tidy_kwargs(self):
for k in self._kwargs.copy():
if self._kwargs[k] is None:
self._kwargs.pop(k, None)
def _input_stream_cycle(self):
while self._running:
while self._stream_data.empty():
if self._running:
time.sleep(0.01)
continue
else:
break
# Reset silence_timer when getting stream.
if self._silence_timer is not None and self._silence_timer.is_alive( # noqa E501
):
self._silence_timer.cancel()
self._silence_timer = Timer(Recognition.SILENCE_TIMEOUT_S,
self._silence_stop_timer)
self._silence_timer.start()
while not self._stream_data.empty():
frame = self._stream_data.get()
yield bytes(frame)
if self._recognition_once:
self._running = False
# drain all audio data when invoking stop().
if self._recognition_once is False:
while not self._stream_data.empty():
frame = self._stream_data.get()
yield bytes(frame)
def _silence_stop_timer(self):
"""If audio data is not received for a long time, exit worker.
"""
self._running = False
if self._silence_timer is not None and self._silence_timer.is_alive():
self._silence_timer.cancel()
self._silence_timer = None
if self._worker is not None and self._worker.is_alive():
self._worker.join()
self._stream_data = Queue()
def get_first_package_delay(self):
"""First Package Delay is the time between start sending audio and receive first words package
"""
return self._first_package_timestamp - self._start_stream_timestamp
def get_last_package_delay(self):
"""Last Package Delay is the time between stop sending audio and receive last words package
"""
return self._on_complete_timestamp - self._stop_stream_timestamp
# 获取上一个任务的taskId
def get_last_request_id(self):
return self.last_request_id

View File

@@ -0,0 +1,231 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import asyncio
import time
from typing import List, Union
import aiohttp
from dashscope.api_entities.dashscope_response import (DashScopeAPIResponse,
TranscriptionResponse)
from dashscope.client.base_api import BaseAsyncApi
from dashscope.common.constants import ApiProtocol, HTTPMethod
from dashscope.common.logging import logger
from dashscope.common.utils import _get_task_group_and_task
class Transcription(BaseAsyncApi):
"""API for File Transcription models.
"""
MAX_QUERY_TRY_COUNT = 3
class Models:
paraformer_v1 = 'paraformer-v1'
paraformer_8k_v1 = 'paraformer-8k-v1'
paraformer_mtl_v1 = 'paraformer-mtl-v1'
@classmethod
def call(cls,
model: str,
file_urls: List[str],
phrase_id: str = None,
api_key: str = None,
workspace: str = None,
**kwargs) -> TranscriptionResponse:
"""Transcribe the given files synchronously.
Args:
model (str): The requested model_id.
file_urls (List[str]): List of stored URLs.
phrase_id (str, `optional`): The ID of phrase.
workspace (str): The dashscope workspace id.
**kwargs:
channel_id (List[int], optional):
The selected channel_id of audio file.
disfluency_removal_enabled(bool, `optional`):
Filter mood words, turned off by default.
diarization_enabled (bool, `optional`):
Speech auto diarization, turned off by default.
speaker_count (int, `optional`): The number of speakers.
timestamp_alignment_enabled (bool, `optional`):
Timestamp-alignment calibration, turned off by default.
special_word_filter(str, `optional`): Sensitive word filter.
audio_event_detection_enabled(bool, `optional`):
Audio event detection, turned off by default.
Returns:
TranscriptionResponse: The result of batch transcription.
"""
kwargs.update(cls._fill_resource_id(phrase_id, **kwargs))
kwargs = cls._tidy_kwargs(**kwargs)
response = super().call(model,
file_urls,
api_key=api_key,
workspace=workspace,
**kwargs)
return TranscriptionResponse.from_api_response(response)
@classmethod
def async_call(cls,
model: str,
file_urls: List[str],
phrase_id: str = None,
api_key: str = None,
workspace: str = None,
**kwargs) -> TranscriptionResponse:
"""Transcribe the given files asynchronously,
return the status of task submission for querying results subsequently.
Args:
model (str): The requested model, such as paraformer-16k-1
file_urls (List[str]): List of stored URLs.
phrase_id (str, `optional`): The ID of phrase.
workspace (str): The dashscope workspace id.
**kwargs:
channel_id (List[int], optional):
The selected channel_id of audio file.
disfluency_removal_enabled(bool, `optional`):
Filter mood words, turned off by default.
diarization_enabled (bool, `optional`):
Speech auto diarization, turned off by default.
speaker_count (int, `optional`): The number of speakers.
timestamp_alignment_enabled (bool, `optional`):
Timestamp-alignment calibration, turned off by default.
special_word_filter(str, `optional`): Sensitive word filter.
audio_event_detection_enabled(bool, `optional`):
Audio event detection, turned off by default.
Returns:
TranscriptionResponse: The response including task_id.
"""
kwargs.update(cls._fill_resource_id(phrase_id, **kwargs))
kwargs = cls._tidy_kwargs(**kwargs)
response = cls._launch_request(model,
file_urls,
api_key=api_key,
workspace=workspace,
**kwargs)
return TranscriptionResponse.from_api_response(response)
@classmethod
def fetch(cls,
task: Union[str, TranscriptionResponse],
api_key: str = None,
workspace: str = None,
**kwargs) -> TranscriptionResponse:
"""Fetch the status of task, including results of batch transcription when task_status is SUCCEEDED. # noqa: E501
Args:
task (Union[str, TranscriptionResponse]): The task_id or
response including task_id returned from async_call().
workspace (str): The dashscope workspace id.
Returns:
TranscriptionResponse: The status of task_id,
including results of batch transcription when task_status is SUCCEEDED.
"""
try_count: int = 0
while True:
try:
response = super().fetch(task,
api_key=api_key,
workspace=workspace,
**kwargs)
except (asyncio.TimeoutError, aiohttp.ClientConnectorError) as e:
logger.error(e)
try_count += 1
if try_count <= Transcription.MAX_QUERY_TRY_COUNT:
time.sleep(2)
continue
try_count = 0
break
return TranscriptionResponse.from_api_response(response)
@classmethod
def wait(cls,
task: Union[str, TranscriptionResponse],
api_key: str = None,
workspace: str = None,
**kwargs) -> TranscriptionResponse:
"""Poll task until the final results of transcription is obtained.
Args:
task (Union[str, TranscriptionResponse]): The task_id or
response including task_id returned from async_call().
workspace (str): The dashscope workspace id.
Returns:
TranscriptionResponse: The result of batch transcription.
"""
response = super().wait(task,
api_key=api_key,
workspace=workspace,
**kwargs)
return TranscriptionResponse.from_api_response(response)
@classmethod
def _launch_request(cls,
model: str,
files: List[str],
api_key: str = None,
workspace: str = None,
**kwargs) -> DashScopeAPIResponse:
"""Submit transcribe request.
Args:
model (str): The requested model, such as paraformer-16k-1
files (List[str]): List of stored URLs.
workspace (str): The dashscope workspace id.
Returns:
DashScopeAPIResponse: The result of task submission.
"""
task_name, function = _get_task_group_and_task(__name__)
try_count: int = 0
while True:
try:
response = super().async_call(model=model,
task_group='audio',
task=task_name,
function=function,
input={'file_urls': files},
api_protocol=ApiProtocol.HTTP,
http_method=HTTPMethod.POST,
api_key=api_key,
workspace=workspace,
**kwargs)
except (asyncio.TimeoutError, aiohttp.ClientConnectorError) as e:
logger.error(e)
try_count += 1
if try_count <= Transcription.MAX_QUERY_TRY_COUNT:
time.sleep(2)
continue
break
return response
@classmethod
def _fill_resource_id(cls, phrase_id: str, **kwargs):
resources_list: list = []
if phrase_id is not None and len(phrase_id) > 0:
item = {'resource_id': phrase_id, 'resource_type': 'asr_phrase'}
resources_list.append(item)
if len(resources_list) > 0:
kwargs['resources'] = resources_list
return kwargs
@classmethod
def _tidy_kwargs(cls, **kwargs):
for k in kwargs.copy():
if kwargs[k] is None:
kwargs.pop(k, None)
return kwargs

View File

@@ -0,0 +1,177 @@
# Copyright (c) Alibaba, Inc. and its affiliates.
import asyncio
import time
from typing import List
import aiohttp
from dashscope.client.base_api import BaseApi
from dashscope.common.constants import ApiProtocol, HTTPMethod
from dashscope.common.logging import logger
class VocabularyServiceException(Exception):
def __init__(self, request_id: str, status_code: int, code: str,
error_message: str) -> None:
self._request_id = request_id
self._status_code = status_code
self._code = code
self._error_message = error_message
def __str__(self):
return f'Request: {self._request_id}, Status Code: {self._status_code}, Code: {self._code}, Error Message: {self._error_message}'
class VocabularyService(BaseApi):
'''
API for asr vocabulary service
'''
MAX_QUERY_TRY_COUNT = 3
def __init__(self,
api_key=None,
workspace=None,
model=None,
**kwargs) -> None:
super().__init__()
self._api_key = api_key
self._workspace = workspace
self._kwargs = kwargs
self._last_request_id = None
self.model = model
if self.model is None:
self.model = 'speech-biasing'
def __call_with_input(self, input):
try_count = 0
while True:
try:
response = super().call(model=self.model,
task_group='audio',
task='asr',
function='customization',
input=input,
api_protocol=ApiProtocol.HTTP,
http_method=HTTPMethod.POST,
api_key=self._api_key,
workspace=self._workspace,
**self._kwargs)
except (asyncio.TimeoutError, aiohttp.ClientConnectorError) as e:
logger.error(e)
try_count += 1
if try_count <= VocabularyService.MAX_QUERY_TRY_COUNT:
time.sleep(2)
continue
break
logger.debug('>>>>recv', response)
return response
def create_vocabulary(self, target_model: str, prefix: str,
vocabulary: List[dict]) -> str:
'''
创建热词表
param: target_model 热词表对应的语音识别模型版本
param: prefix 热词表自定义前缀,仅允许数字和小写字母,小于十个字符。
param: vocabulary 热词表字典
return: 热词表标识符 vocabulary_id
'''
response = self.__call_with_input(input={
'action': 'create_vocabulary',
'target_model': target_model,
'prefix': prefix,
'vocabulary': vocabulary,
}, )
if response.status_code == 200:
self._last_request_id = response.request_id
return response.output['vocabulary_id']
else:
raise VocabularyServiceException(response.request_id, response.status_code,
response.code, response.message)
def list_vocabularies(self,
prefix=None,
page_index: int = 0,
page_size: int = 10) -> List[dict]:
'''
查询已创建的所有热词表
param: prefix 自定义前缀,如果设定则只返回指定前缀的热词表标识符列表。
param: page_index 查询的页索引
param: page_size 查询页大小
return: 热词表标识符列表
'''
if prefix:
response = self.__call_with_input(input={
'action': 'list_vocabulary',
'prefix': prefix,
'page_index': page_index,
'page_size': page_size,
}, )
else:
response = self.__call_with_input(input={
'action': 'list_vocabulary',
'page_index': page_index,
'page_size': page_size,
}, )
if response.status_code == 200:
self._last_request_id = response.request_id
return response.output['vocabulary_list']
else:
raise VocabularyServiceException(response.request_id, response.status_code,
response.code, response.message)
def query_vocabulary(self, vocabulary_id: str) -> List[dict]:
'''
获取热词表内容
param: vocabulary_id 热词表标识符
return: 热词表
'''
response = self.__call_with_input(input={
'action': 'query_vocabulary',
'vocabulary_id': vocabulary_id,
}, )
if response.status_code == 200:
self._last_request_id = response.request_id
return response.output
else:
raise VocabularyServiceException(response.request_id, response.status_code,
response.code, response.message)
def update_vocabulary(self, vocabulary_id: str,
vocabulary: List[dict]) -> None:
'''
用新的热词表替换已有热词表
param: vocabulary_id 需要替换的热词表标识符
param: vocabulary 热词表
'''
response = self.__call_with_input(input={
'action': 'update_vocabulary',
'vocabulary_id': vocabulary_id,
'vocabulary': vocabulary,
}, )
if response.status_code == 200:
self._last_request_id = response.request_id
return
else:
raise VocabularyServiceException(response.request_id, response.status_code,
response.code, response.message)
def delete_vocabulary(self, vocabulary_id: str) -> None:
'''
删除热词表
param: vocabulary_id 需要删除的热词表标识符
'''
response = self.__call_with_input(input={
'action': 'delete_vocabulary',
'vocabulary_id': vocabulary_id,
}, )
if response.status_code == 200:
self._last_request_id = response.request_id
return
else:
raise VocabularyServiceException(response.request_id, response.status_code,
response.code, response.message)
def get_last_request_id(self):
return self._last_request_id