chore: 添加虚拟环境到仓库
- 添加 backend_service/venv 虚拟环境 - 包含所有Python依赖包 - 注意:虚拟环境约393MB,包含12655个文件
This commit is contained in:
@@ -0,0 +1,20 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
|
||||
from .asr_phrase_manager import AsrPhraseManager
|
||||
from .recognition import Recognition, RecognitionCallback, RecognitionResult
|
||||
from .transcription import Transcription
|
||||
from .translation_recognizer import (TranscriptionResult, Translation,
|
||||
TranslationRecognizerCallback,
|
||||
TranslationRecognizerChat,
|
||||
TranslationRecognizerRealtime,
|
||||
TranslationRecognizerResultPack,
|
||||
TranslationResult)
|
||||
from .vocabulary import VocabularyService, VocabularyServiceException
|
||||
|
||||
__all__ = [
|
||||
'Transcription', 'Recognition', 'RecognitionCallback', 'RecognitionResult',
|
||||
'AsrPhraseManager', 'VocabularyServiceException', 'VocabularyService',
|
||||
'TranslationRecognizerRealtime', 'TranslationRecognizerChat',
|
||||
'TranslationRecognizerCallback', 'Translation', 'TranslationResult',
|
||||
'TranscriptionResult', 'TranslationRecognizerResultPack'
|
||||
]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,203 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
|
||||
from http import HTTPStatus
|
||||
from typing import Any, Dict
|
||||
|
||||
from dashscope.api_entities.dashscope_response import DashScopeAPIResponse
|
||||
from dashscope.client.base_api import BaseAsyncApi
|
||||
from dashscope.common.error import InvalidParameter
|
||||
from dashscope.common.logging import logger
|
||||
from dashscope.customize.finetunes import FineTunes
|
||||
|
||||
|
||||
class AsrPhraseManager(BaseAsyncApi):
|
||||
"""Hot word management for speech recognition.
|
||||
"""
|
||||
@classmethod
|
||||
def create_phrases(cls,
|
||||
model: str,
|
||||
phrases: Dict[str, Any],
|
||||
training_type: str = 'compile_asr_phrase',
|
||||
workspace: str = None,
|
||||
**kwargs) -> DashScopeAPIResponse:
|
||||
"""Create hot words.
|
||||
|
||||
Args:
|
||||
model (str): The requested model.
|
||||
phrases (Dict[str, Any]): A dictionary that contains phrases,
|
||||
such as {'下一首':90,'上一首':90}.
|
||||
training_type (str, `optional`): The training type,
|
||||
'compile_asr_phrase' is default.
|
||||
workspace (str): The dashscope workspace id.
|
||||
|
||||
Raises:
|
||||
InvalidParameter: Parameter input is None or empty!
|
||||
|
||||
Returns:
|
||||
DashScopeAPIResponse: The results of creating hot words.
|
||||
"""
|
||||
if phrases is None or len(phrases) == 0:
|
||||
raise InvalidParameter('phrases is empty!')
|
||||
if training_type is None or len(training_type) == 0:
|
||||
raise InvalidParameter('training_type is empty!')
|
||||
|
||||
original_ft_sub_path = FineTunes.SUB_PATH
|
||||
FineTunes.SUB_PATH = 'fine-tunes'
|
||||
response = FineTunes.call(model=model,
|
||||
training_file_ids=[],
|
||||
validation_file_ids=[],
|
||||
mode=training_type,
|
||||
hyper_parameters={'phrase_list': phrases},
|
||||
workspace=workspace,
|
||||
**kwargs)
|
||||
FineTunes.SUB_PATH = original_ft_sub_path
|
||||
|
||||
if response.status_code != HTTPStatus.OK:
|
||||
logger.error('Create phrase failed, ' + str(response))
|
||||
|
||||
return response
|
||||
|
||||
@classmethod
|
||||
def update_phrases(cls,
|
||||
model: str,
|
||||
phrase_id: str,
|
||||
phrases: Dict[str, Any],
|
||||
training_type: str = 'compile_asr_phrase',
|
||||
workspace: str = None,
|
||||
**kwargs) -> DashScopeAPIResponse:
|
||||
"""Update the hot words marked phrase_id.
|
||||
|
||||
Args:
|
||||
model (str): The requested model.
|
||||
phrase_id (str): The ID of phrases,
|
||||
which created by create_phrases().
|
||||
phrases (Dict[str, Any]): A dictionary that contains phrases,
|
||||
such as {'暂停':90}.
|
||||
training_type (str, `optional`):
|
||||
The training type, 'compile_asr_phrase' is default.
|
||||
workspace (str): The dashscope workspace id.
|
||||
|
||||
Raises:
|
||||
InvalidParameter: Parameter input is None or empty!
|
||||
|
||||
Returns:
|
||||
DashScopeAPIResponse: The results of updating hot words.
|
||||
"""
|
||||
if phrase_id is None or len(phrase_id) == 0:
|
||||
raise InvalidParameter('phrase_id is empty!')
|
||||
if phrases is None or len(phrases) == 0:
|
||||
raise InvalidParameter('phrases is empty!')
|
||||
if training_type is None or len(training_type) == 0:
|
||||
raise InvalidParameter('training_type is empty!')
|
||||
|
||||
original_ft_sub_path = FineTunes.SUB_PATH
|
||||
FineTunes.SUB_PATH = 'fine-tunes'
|
||||
response = FineTunes.call(model=model,
|
||||
training_file_ids=[],
|
||||
validation_file_ids=[],
|
||||
mode=training_type,
|
||||
hyper_parameters={'phrase_list': phrases},
|
||||
finetuned_output=phrase_id,
|
||||
workspace=workspace,
|
||||
**kwargs)
|
||||
FineTunes.SUB_PATH = original_ft_sub_path
|
||||
|
||||
if response.status_code != HTTPStatus.OK:
|
||||
logger.error('Update phrase failed, ' + str(response))
|
||||
|
||||
return response
|
||||
|
||||
@classmethod
|
||||
def query_phrases(cls,
|
||||
phrase_id: str,
|
||||
workspace: str = None,
|
||||
**kwargs) -> DashScopeAPIResponse:
|
||||
"""Query the hot words by phrase_id.
|
||||
|
||||
Args:
|
||||
phrase_id (str): The ID of phrases,
|
||||
which created by create_phrases().
|
||||
workspace (str): The dashscope workspace id.
|
||||
|
||||
Raises:
|
||||
InvalidParameter: phrase_id input is None or empty!
|
||||
|
||||
Returns:
|
||||
AsrPhraseManagerResult: The results of querying hot words.
|
||||
"""
|
||||
if phrase_id is None or len(phrase_id) == 0:
|
||||
raise InvalidParameter('phrase_id is empty!')
|
||||
|
||||
original_ft_sub_path = FineTunes.SUB_PATH
|
||||
FineTunes.SUB_PATH = 'fine-tunes/outputs'
|
||||
response = FineTunes.get(job_id=phrase_id,
|
||||
workspace=workspace,
|
||||
**kwargs)
|
||||
FineTunes.SUB_PATH = original_ft_sub_path
|
||||
|
||||
if response.status_code != HTTPStatus.OK:
|
||||
logger.error('Query phrase failed, ' + str(response))
|
||||
|
||||
return response
|
||||
|
||||
@classmethod
|
||||
def list_phrases(cls,
|
||||
page: int = 1,
|
||||
page_size: int = 10,
|
||||
workspace: str = None,
|
||||
**kwargs) -> DashScopeAPIResponse:
|
||||
"""List all information of phrases.
|
||||
|
||||
Args:
|
||||
page (int): Page number, greater than 0, default value 1.
|
||||
page_size (int): The paging size, greater than 0
|
||||
and less than or equal to 100, default value 10.
|
||||
workspace (str): The dashscope workspace id.
|
||||
|
||||
Returns:
|
||||
DashScopeAPIResponse: The results of listing hot words.
|
||||
"""
|
||||
original_ft_sub_path = FineTunes.SUB_PATH
|
||||
FineTunes.SUB_PATH = 'fine-tunes/outputs'
|
||||
response = FineTunes.list(page=page,
|
||||
page_size=page_size,
|
||||
workspace=workspace,
|
||||
**kwargs)
|
||||
FineTunes.SUB_PATH = original_ft_sub_path
|
||||
|
||||
if response.status_code != HTTPStatus.OK:
|
||||
logger.error('List phrase failed, ' + str(response))
|
||||
|
||||
return response
|
||||
|
||||
@classmethod
|
||||
def delete_phrases(cls,
|
||||
phrase_id: str,
|
||||
workspace: str = None,
|
||||
**kwargs) -> DashScopeAPIResponse:
|
||||
"""Delete the hot words by phrase_id.
|
||||
|
||||
Args:
|
||||
phrase_id (str): The ID of phrases,
|
||||
which created by create_phrases().
|
||||
|
||||
Raises:
|
||||
InvalidParameter: phrase_id input is None or empty!
|
||||
|
||||
Returns:
|
||||
DashScopeAPIResponse: The results of deleting hot words.
|
||||
"""
|
||||
if phrase_id is None or len(phrase_id) == 0:
|
||||
raise InvalidParameter('phrase_id is empty!')
|
||||
|
||||
original_ft_sub_path = FineTunes.SUB_PATH
|
||||
FineTunes.SUB_PATH = 'fine-tunes/outputs'
|
||||
response = FineTunes.delete(job_id=phrase_id,
|
||||
workspace=workspace,
|
||||
**kwargs)
|
||||
FineTunes.SUB_PATH = original_ft_sub_path
|
||||
|
||||
if response.status_code != HTTPStatus.OK:
|
||||
logger.error('Delete phrase failed, ' + str(response))
|
||||
|
||||
return response
|
||||
@@ -0,0 +1,527 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
|
||||
import json
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from http import HTTPStatus
|
||||
from queue import Queue
|
||||
from threading import Timer
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
from dashscope.api_entities.dashscope_response import RecognitionResponse
|
||||
from dashscope.client.base_api import BaseApi
|
||||
from dashscope.common.constants import ApiProtocol
|
||||
from dashscope.common.error import (InputDataRequired, InputRequired,
|
||||
InvalidParameter, InvalidTask,
|
||||
ModelRequired)
|
||||
from dashscope.common.logging import logger
|
||||
from dashscope.common.utils import _get_task_group_and_task
|
||||
from dashscope.protocol.websocket import WebsocketStreamingMode
|
||||
|
||||
|
||||
class RecognitionResult(RecognitionResponse):
|
||||
"""The result set of speech recognition, including the single-sentence
|
||||
recognition result returned by the callback mode, and all recognition
|
||||
results in a synchronized manner.
|
||||
"""
|
||||
def __init__(self,
|
||||
response: RecognitionResponse,
|
||||
sentences: List[Any] = None,
|
||||
usages: List[Any] = None):
|
||||
self.status_code = response.status_code
|
||||
self.request_id = response.request_id
|
||||
self.code = response.code
|
||||
self.message = response.message
|
||||
self.usages = usages
|
||||
if sentences is not None and len(sentences) > 0:
|
||||
self.output = {'sentence': sentences}
|
||||
else:
|
||||
self.output = response.output
|
||||
if self.usages is not None and len(
|
||||
self.usages) > 0 and 'usage' in self.usages[-1]:
|
||||
self.usage = self.usages[-1]['usage']
|
||||
else:
|
||||
self.usage = None
|
||||
|
||||
def __str__(self):
|
||||
return json.dumps(RecognitionResponse.from_api_response(self),
|
||||
ensure_ascii=False)
|
||||
|
||||
def get_sentence(self) -> Union[Dict[str, Any], List[Any]]:
|
||||
"""The result of speech recognition.
|
||||
"""
|
||||
if self.output and 'sentence' in self.output:
|
||||
return self.output['sentence']
|
||||
else:
|
||||
return None
|
||||
|
||||
def get_request_id(self) -> str:
|
||||
"""The request_id of speech recognition.
|
||||
"""
|
||||
return self.request_id
|
||||
|
||||
def get_usage(self, sentence: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Get billing for the input sentence.
|
||||
"""
|
||||
if self.usages is not None:
|
||||
if sentence is not None and 'end_time' in sentence and sentence[
|
||||
'end_time'] is not None:
|
||||
for usage in self.usages:
|
||||
if usage['end_time'] == sentence['end_time']:
|
||||
return usage['usage']
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def is_sentence_end(sentence: Dict[str, Any]) -> bool:
|
||||
"""Determine whether the speech recognition result is the end of a sentence.
|
||||
This is a static method.
|
||||
"""
|
||||
if sentence is not None and 'end_time' in sentence and sentence[
|
||||
'end_time'] is not None:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
class RecognitionCallback():
|
||||
"""An interface that defines callback methods for getting speech recognition results. # noqa E501
|
||||
Derive from this class and implement its function to provide your own data.
|
||||
"""
|
||||
def on_open(self) -> None:
|
||||
pass
|
||||
|
||||
def on_complete(self) -> None:
|
||||
pass
|
||||
|
||||
def on_error(self, result: RecognitionResult) -> None:
|
||||
pass
|
||||
|
||||
def on_close(self) -> None:
|
||||
pass
|
||||
|
||||
def on_event(self, result: RecognitionResult) -> None:
|
||||
pass
|
||||
|
||||
|
||||
class Recognition(BaseApi):
|
||||
"""Speech recognition interface.
|
||||
|
||||
Args:
|
||||
model (str): The requested model_id.
|
||||
callback (RecognitionCallback): A callback that returns
|
||||
speech recognition results.
|
||||
format (str): The input audio format for speech recognition.
|
||||
sample_rate (int): The input audio sample rate for speech recognition.
|
||||
workspace (str): The dashscope workspace id.
|
||||
|
||||
**kwargs:
|
||||
phrase_id (list, `optional`): The ID of phrase.
|
||||
disfluency_removal_enabled(bool, `optional`): Filter mood words,
|
||||
turned off by default.
|
||||
diarization_enabled (bool, `optional`): Speech auto diarization,
|
||||
turned off by default.
|
||||
speaker_count (int, `optional`): The number of speakers.
|
||||
timestamp_alignment_enabled (bool, `optional`): Timestamp-alignment
|
||||
calibration, turned off by default.
|
||||
special_word_filter(str, `optional`): Sensitive word filter.
|
||||
audio_event_detection_enabled(bool, `optional`):
|
||||
Audio event detection, turned off by default.
|
||||
|
||||
Raises:
|
||||
InputRequired: Input is required.
|
||||
"""
|
||||
|
||||
SILENCE_TIMEOUT_S = 23
|
||||
|
||||
def __init__(self,
|
||||
model: str,
|
||||
callback: RecognitionCallback,
|
||||
format: str,
|
||||
sample_rate: int,
|
||||
workspace: str = None,
|
||||
**kwargs):
|
||||
if model is None:
|
||||
raise ModelRequired('Model is required!')
|
||||
if format is None:
|
||||
raise InputRequired('format is required!')
|
||||
if sample_rate is None:
|
||||
raise InputRequired('sample_rate is required!')
|
||||
|
||||
self.model = model
|
||||
self.format = format
|
||||
self.sample_rate = sample_rate
|
||||
# continuous recognition with start() or once recognition with call()
|
||||
self._recognition_once = False
|
||||
self._callback = callback
|
||||
self._running = False
|
||||
self._stream_data = Queue()
|
||||
self._worker = None
|
||||
self._silence_timer = None
|
||||
self._kwargs = kwargs
|
||||
self._workspace = workspace
|
||||
self._start_stream_timestamp = -1
|
||||
self._first_package_timestamp = -1
|
||||
self._stop_stream_timestamp = -1
|
||||
self._on_complete_timestamp = -1
|
||||
self.request_id_confirmed = False
|
||||
self.last_request_id = uuid.uuid4().hex
|
||||
|
||||
def __del__(self):
|
||||
if self._running:
|
||||
self._running = False
|
||||
self._stream_data = Queue()
|
||||
if self._worker is not None and self._worker.is_alive():
|
||||
self._worker.join()
|
||||
if self._silence_timer is not None and self._silence_timer.is_alive( # noqa E501
|
||||
):
|
||||
self._silence_timer.cancel()
|
||||
self._silence_timer = None
|
||||
if self._callback:
|
||||
self._callback.on_close()
|
||||
|
||||
def __receive_worker(self):
|
||||
"""Asynchronously, initiate a real-time speech recognition request and
|
||||
obtain the result for parsing.
|
||||
"""
|
||||
responses = self.__launch_request()
|
||||
for part in responses:
|
||||
if part.status_code == HTTPStatus.OK:
|
||||
if len(part.output) == 0 or ('finished' in part.output and part.output['finished'] == True):
|
||||
self._on_complete_timestamp = time.time() * 1000
|
||||
logger.debug('last package delay {}'.format(
|
||||
self.get_last_package_delay()))
|
||||
self._callback.on_complete()
|
||||
else:
|
||||
usage: Dict[str, Any] = None
|
||||
usages: List[Any] = None
|
||||
if 'sentence' in part.output:
|
||||
if (self._first_package_timestamp < 0):
|
||||
self._first_package_timestamp = time.time() * 1000
|
||||
logger.debug('first package delay {}'.format(
|
||||
self.get_first_package_delay()))
|
||||
sentence = part.output['sentence']
|
||||
if 'heartbeat' in sentence and sentence['heartbeat'] == True:
|
||||
logger.debug('recv heartbeat')
|
||||
continue
|
||||
logger.debug(
|
||||
'Recv Result [rid:{}]:{}, isEnd: {}'.format(
|
||||
part.request_id, sentence,
|
||||
RecognitionResult.is_sentence_end(sentence)))
|
||||
if part.usage is not None:
|
||||
usage = {
|
||||
'end_time':
|
||||
part.output['sentence']['end_time'],
|
||||
'usage': part.usage
|
||||
}
|
||||
usages = [usage]
|
||||
if self.request_id_confirmed is False and part.request_id is not None:
|
||||
self.last_request_id = part.request_id
|
||||
self.request_id_confirmed = True
|
||||
|
||||
self._callback.on_event(
|
||||
RecognitionResult(
|
||||
RecognitionResponse.from_api_response(part),
|
||||
usages=usages))
|
||||
else:
|
||||
self._running = False
|
||||
self._stream_data = Queue()
|
||||
self._callback.on_error(
|
||||
RecognitionResult(
|
||||
RecognitionResponse.from_api_response(part)))
|
||||
self._callback.on_close()
|
||||
break
|
||||
|
||||
def __launch_request(self):
|
||||
"""Initiate real-time speech recognition requests.
|
||||
"""
|
||||
resources_list: list = []
|
||||
if self._phrase is not None and len(self._phrase) > 0:
|
||||
item = {'resource_id': self._phrase, 'resource_type': 'asr_phrase'}
|
||||
resources_list.append(item)
|
||||
|
||||
if len(resources_list) > 0:
|
||||
self._kwargs['resources'] = resources_list
|
||||
|
||||
self._tidy_kwargs()
|
||||
task_name, _ = _get_task_group_and_task(__name__)
|
||||
responses = super().call(model=self.model,
|
||||
task_group='audio',
|
||||
task=task_name,
|
||||
function='recognition',
|
||||
input=self._input_stream_cycle(),
|
||||
api_protocol=ApiProtocol.WEBSOCKET,
|
||||
ws_stream_mode=WebsocketStreamingMode.DUPLEX,
|
||||
is_binary_input=True,
|
||||
sample_rate=self.sample_rate,
|
||||
format=self.format,
|
||||
stream=True,
|
||||
workspace=self._workspace,
|
||||
pre_task_id=self.last_request_id,
|
||||
**self._kwargs)
|
||||
return responses
|
||||
|
||||
def start(self, phrase_id: str = None, **kwargs):
|
||||
"""Real-time speech recognition in asynchronous mode.
|
||||
Please call 'stop()' after you have completed recognition.
|
||||
|
||||
Args:
|
||||
phrase_id (str, `optional`): The ID of phrase.
|
||||
|
||||
**kwargs:
|
||||
disfluency_removal_enabled(bool, `optional`):
|
||||
Filter mood words, turned off by default.
|
||||
diarization_enabled (bool, `optional`):
|
||||
Speech auto diarization, turned off by default.
|
||||
speaker_count (int, `optional`): The number of speakers.
|
||||
timestamp_alignment_enabled (bool, `optional`):
|
||||
Timestamp-alignment calibration, turned off by default.
|
||||
special_word_filter(str, `optional`): Sensitive word filter.
|
||||
audio_event_detection_enabled(bool, `optional`):
|
||||
Audio event detection, turned off by default.
|
||||
|
||||
Raises:
|
||||
InvalidParameter: This interface cannot be called again
|
||||
if it has already been started.
|
||||
InvalidTask: Task create failed.
|
||||
"""
|
||||
assert self._callback is not None, 'Please set the callback to get the speech recognition result.' # noqa E501
|
||||
|
||||
if self._running:
|
||||
raise InvalidParameter('Speech recognition has started.')
|
||||
|
||||
self._start_stream_timestamp = -1
|
||||
self._first_package_timestamp = -1
|
||||
self._stop_stream_timestamp = -1
|
||||
self._on_complete_timestamp = -1
|
||||
self._phrase = phrase_id
|
||||
self._kwargs.update(**kwargs)
|
||||
self._recognition_once = False
|
||||
self._worker = threading.Thread(target=self.__receive_worker)
|
||||
self._worker.start()
|
||||
if self._worker.is_alive():
|
||||
self._running = True
|
||||
self._callback.on_open()
|
||||
|
||||
# If audio data is not received for 23 seconds, the timeout exits
|
||||
self._silence_timer = Timer(Recognition.SILENCE_TIMEOUT_S,
|
||||
self._silence_stop_timer)
|
||||
self._silence_timer.start()
|
||||
else:
|
||||
self._running = False
|
||||
raise InvalidTask('Invalid task, task create failed.')
|
||||
|
||||
def call(self,
|
||||
file: str,
|
||||
phrase_id: str = None,
|
||||
**kwargs) -> RecognitionResult:
|
||||
"""Real-time speech recognition in synchronous mode.
|
||||
|
||||
Args:
|
||||
file (str): The path to the local audio file.
|
||||
phrase_id (str, `optional`): The ID of phrase.
|
||||
|
||||
**kwargs:
|
||||
disfluency_removal_enabled(bool, `optional`):
|
||||
Filter mood words, turned off by default.
|
||||
diarization_enabled (bool, `optional`):
|
||||
Speech auto diarization, turned off by default.
|
||||
speaker_count (int, `optional`): The number of speakers.
|
||||
timestamp_alignment_enabled (bool, `optional`):
|
||||
Timestamp-alignment calibration, turned off by default.
|
||||
special_word_filter(str, `optional`): Sensitive word filter.
|
||||
audio_event_detection_enabled(bool, `optional`):
|
||||
Audio event detection, turned off by default.
|
||||
|
||||
Raises:
|
||||
InvalidParameter: This interface cannot be called again
|
||||
if it has already been started.
|
||||
InputDataRequired: The supplied file was empty.
|
||||
|
||||
Returns:
|
||||
RecognitionResult: The result of speech recognition.
|
||||
"""
|
||||
self._start_stream_timestamp = time.time() * 1000
|
||||
if self._running:
|
||||
raise InvalidParameter('Speech recognition has been called.')
|
||||
|
||||
if os.path.exists(file):
|
||||
if os.path.isdir(file):
|
||||
raise IsADirectoryError('Is a directory: ' + file)
|
||||
else:
|
||||
raise FileNotFoundError('No such file or directory: ' + file)
|
||||
|
||||
self._recognition_once = True
|
||||
self._stream_data = Queue()
|
||||
self._phrase = phrase_id
|
||||
self._kwargs.update(**kwargs)
|
||||
error_flag: bool = False
|
||||
sentences: List[Any] = []
|
||||
usages: List[Any] = []
|
||||
response: RecognitionResponse = None
|
||||
result: RecognitionResult = None
|
||||
|
||||
try:
|
||||
audio_data: bytes = None
|
||||
f = open(file, 'rb')
|
||||
if os.path.getsize(file):
|
||||
while True:
|
||||
audio_data = f.read(12800)
|
||||
if not audio_data:
|
||||
break
|
||||
else:
|
||||
self._stream_data.put(audio_data)
|
||||
else:
|
||||
raise InputDataRequired(
|
||||
'The supplied file was empty (zero bytes long)')
|
||||
f.close()
|
||||
self._stop_stream_timestamp = time.time() * 1000
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
raise e
|
||||
|
||||
if not self._stream_data.empty():
|
||||
self._running = True
|
||||
responses = self.__launch_request()
|
||||
for part in responses:
|
||||
if part.status_code == HTTPStatus.OK:
|
||||
if 'sentence' in part.output:
|
||||
if (self._first_package_timestamp < 0):
|
||||
self._first_package_timestamp = time.time() * 1000
|
||||
logger.debug('first package delay {}'.format(
|
||||
self._first_package_timestamp -
|
||||
self._start_stream_timestamp))
|
||||
sentence = part.output['sentence']
|
||||
logger.debug(
|
||||
'Recv Result [rid:{}]:{}, isEnd: {}'.format(
|
||||
part.request_id, sentence,
|
||||
RecognitionResult.is_sentence_end(sentence)))
|
||||
if RecognitionResult.is_sentence_end(sentence):
|
||||
sentences.append(sentence)
|
||||
|
||||
if part.usage is not None:
|
||||
usage = {
|
||||
'end_time':
|
||||
part.output['sentence']['end_time'],
|
||||
'usage': part.usage
|
||||
}
|
||||
usages.append(usage)
|
||||
|
||||
response = RecognitionResponse.from_api_response(part)
|
||||
else:
|
||||
response = RecognitionResponse.from_api_response(part)
|
||||
logger.error(response)
|
||||
error_flag = True
|
||||
break
|
||||
|
||||
self._on_complete_timestamp = time.time() * 1000
|
||||
logger.debug('last package delay {}'.format(
|
||||
self.get_last_package_delay()))
|
||||
|
||||
if error_flag:
|
||||
result = RecognitionResult(response)
|
||||
else:
|
||||
result = RecognitionResult(response, sentences, usages)
|
||||
|
||||
self._stream_data = Queue()
|
||||
self._recognition_once = False
|
||||
self._running = False
|
||||
|
||||
return result
|
||||
|
||||
def stop(self):
|
||||
"""End asynchronous speech recognition.
|
||||
|
||||
Raises:
|
||||
InvalidParameter: Cannot stop an uninitiated recognition.
|
||||
"""
|
||||
if self._running is False:
|
||||
raise InvalidParameter('Speech recognition has stopped.')
|
||||
|
||||
self._stop_stream_timestamp = time.time() * 1000
|
||||
|
||||
self._running = False
|
||||
if self._worker is not None and self._worker.is_alive():
|
||||
self._worker.join()
|
||||
self._stream_data = Queue()
|
||||
if self._silence_timer is not None and self._silence_timer.is_alive():
|
||||
self._silence_timer.cancel()
|
||||
self._silence_timer = None
|
||||
if self._callback:
|
||||
self._callback.on_close()
|
||||
|
||||
def send_audio_frame(self, buffer: bytes):
|
||||
"""Push speech recognition.
|
||||
|
||||
Raises:
|
||||
InvalidParameter: Cannot send data to an uninitiated recognition.
|
||||
"""
|
||||
if self._running is False:
|
||||
raise InvalidParameter('Speech recognition has stopped.')
|
||||
|
||||
if (self._start_stream_timestamp < 0):
|
||||
self._start_stream_timestamp = time.time() * 1000
|
||||
logger.debug('send_audio_frame: {}'.format(len(buffer)))
|
||||
self._stream_data.put(buffer)
|
||||
|
||||
def _tidy_kwargs(self):
|
||||
for k in self._kwargs.copy():
|
||||
if self._kwargs[k] is None:
|
||||
self._kwargs.pop(k, None)
|
||||
|
||||
def _input_stream_cycle(self):
|
||||
while self._running:
|
||||
while self._stream_data.empty():
|
||||
if self._running:
|
||||
time.sleep(0.01)
|
||||
continue
|
||||
else:
|
||||
break
|
||||
|
||||
# Reset silence_timer when getting stream.
|
||||
if self._silence_timer is not None and self._silence_timer.is_alive( # noqa E501
|
||||
):
|
||||
self._silence_timer.cancel()
|
||||
self._silence_timer = Timer(Recognition.SILENCE_TIMEOUT_S,
|
||||
self._silence_stop_timer)
|
||||
self._silence_timer.start()
|
||||
|
||||
while not self._stream_data.empty():
|
||||
frame = self._stream_data.get()
|
||||
yield bytes(frame)
|
||||
|
||||
if self._recognition_once:
|
||||
self._running = False
|
||||
|
||||
# drain all audio data when invoking stop().
|
||||
if self._recognition_once is False:
|
||||
while not self._stream_data.empty():
|
||||
frame = self._stream_data.get()
|
||||
yield bytes(frame)
|
||||
|
||||
def _silence_stop_timer(self):
|
||||
"""If audio data is not received for a long time, exit worker.
|
||||
"""
|
||||
self._running = False
|
||||
if self._silence_timer is not None and self._silence_timer.is_alive():
|
||||
self._silence_timer.cancel()
|
||||
self._silence_timer = None
|
||||
if self._worker is not None and self._worker.is_alive():
|
||||
self._worker.join()
|
||||
self._stream_data = Queue()
|
||||
|
||||
def get_first_package_delay(self):
|
||||
"""First Package Delay is the time between start sending audio and receive first words package
|
||||
"""
|
||||
return self._first_package_timestamp - self._start_stream_timestamp
|
||||
|
||||
def get_last_package_delay(self):
|
||||
"""Last Package Delay is the time between stop sending audio and receive last words package
|
||||
"""
|
||||
return self._on_complete_timestamp - self._stop_stream_timestamp
|
||||
|
||||
# 获取上一个任务的taskId
|
||||
def get_last_request_id(self):
|
||||
return self.last_request_id
|
||||
@@ -0,0 +1,231 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
from typing import List, Union
|
||||
|
||||
import aiohttp
|
||||
|
||||
from dashscope.api_entities.dashscope_response import (DashScopeAPIResponse,
|
||||
TranscriptionResponse)
|
||||
from dashscope.client.base_api import BaseAsyncApi
|
||||
from dashscope.common.constants import ApiProtocol, HTTPMethod
|
||||
from dashscope.common.logging import logger
|
||||
from dashscope.common.utils import _get_task_group_and_task
|
||||
|
||||
|
||||
class Transcription(BaseAsyncApi):
|
||||
"""API for File Transcription models.
|
||||
"""
|
||||
|
||||
MAX_QUERY_TRY_COUNT = 3
|
||||
|
||||
class Models:
|
||||
paraformer_v1 = 'paraformer-v1'
|
||||
paraformer_8k_v1 = 'paraformer-8k-v1'
|
||||
paraformer_mtl_v1 = 'paraformer-mtl-v1'
|
||||
|
||||
@classmethod
|
||||
def call(cls,
|
||||
model: str,
|
||||
file_urls: List[str],
|
||||
phrase_id: str = None,
|
||||
api_key: str = None,
|
||||
workspace: str = None,
|
||||
**kwargs) -> TranscriptionResponse:
|
||||
"""Transcribe the given files synchronously.
|
||||
|
||||
Args:
|
||||
model (str): The requested model_id.
|
||||
file_urls (List[str]): List of stored URLs.
|
||||
phrase_id (str, `optional`): The ID of phrase.
|
||||
workspace (str): The dashscope workspace id.
|
||||
|
||||
**kwargs:
|
||||
channel_id (List[int], optional):
|
||||
The selected channel_id of audio file.
|
||||
disfluency_removal_enabled(bool, `optional`):
|
||||
Filter mood words, turned off by default.
|
||||
diarization_enabled (bool, `optional`):
|
||||
Speech auto diarization, turned off by default.
|
||||
speaker_count (int, `optional`): The number of speakers.
|
||||
timestamp_alignment_enabled (bool, `optional`):
|
||||
Timestamp-alignment calibration, turned off by default.
|
||||
special_word_filter(str, `optional`): Sensitive word filter.
|
||||
audio_event_detection_enabled(bool, `optional`):
|
||||
Audio event detection, turned off by default.
|
||||
|
||||
Returns:
|
||||
TranscriptionResponse: The result of batch transcription.
|
||||
"""
|
||||
kwargs.update(cls._fill_resource_id(phrase_id, **kwargs))
|
||||
kwargs = cls._tidy_kwargs(**kwargs)
|
||||
response = super().call(model,
|
||||
file_urls,
|
||||
api_key=api_key,
|
||||
workspace=workspace,
|
||||
**kwargs)
|
||||
return TranscriptionResponse.from_api_response(response)
|
||||
|
||||
@classmethod
|
||||
def async_call(cls,
|
||||
model: str,
|
||||
file_urls: List[str],
|
||||
phrase_id: str = None,
|
||||
api_key: str = None,
|
||||
workspace: str = None,
|
||||
**kwargs) -> TranscriptionResponse:
|
||||
"""Transcribe the given files asynchronously,
|
||||
return the status of task submission for querying results subsequently.
|
||||
|
||||
Args:
|
||||
model (str): The requested model, such as paraformer-16k-1
|
||||
file_urls (List[str]): List of stored URLs.
|
||||
phrase_id (str, `optional`): The ID of phrase.
|
||||
workspace (str): The dashscope workspace id.
|
||||
|
||||
**kwargs:
|
||||
channel_id (List[int], optional):
|
||||
The selected channel_id of audio file.
|
||||
disfluency_removal_enabled(bool, `optional`):
|
||||
Filter mood words, turned off by default.
|
||||
diarization_enabled (bool, `optional`):
|
||||
Speech auto diarization, turned off by default.
|
||||
speaker_count (int, `optional`): The number of speakers.
|
||||
timestamp_alignment_enabled (bool, `optional`):
|
||||
Timestamp-alignment calibration, turned off by default.
|
||||
special_word_filter(str, `optional`): Sensitive word filter.
|
||||
audio_event_detection_enabled(bool, `optional`):
|
||||
Audio event detection, turned off by default.
|
||||
|
||||
Returns:
|
||||
TranscriptionResponse: The response including task_id.
|
||||
"""
|
||||
kwargs.update(cls._fill_resource_id(phrase_id, **kwargs))
|
||||
kwargs = cls._tidy_kwargs(**kwargs)
|
||||
response = cls._launch_request(model,
|
||||
file_urls,
|
||||
api_key=api_key,
|
||||
workspace=workspace,
|
||||
**kwargs)
|
||||
return TranscriptionResponse.from_api_response(response)
|
||||
|
||||
@classmethod
|
||||
def fetch(cls,
|
||||
task: Union[str, TranscriptionResponse],
|
||||
api_key: str = None,
|
||||
workspace: str = None,
|
||||
**kwargs) -> TranscriptionResponse:
|
||||
"""Fetch the status of task, including results of batch transcription when task_status is SUCCEEDED. # noqa: E501
|
||||
|
||||
Args:
|
||||
task (Union[str, TranscriptionResponse]): The task_id or
|
||||
response including task_id returned from async_call().
|
||||
workspace (str): The dashscope workspace id.
|
||||
|
||||
Returns:
|
||||
TranscriptionResponse: The status of task_id,
|
||||
including results of batch transcription when task_status is SUCCEEDED.
|
||||
"""
|
||||
try_count: int = 0
|
||||
while True:
|
||||
try:
|
||||
response = super().fetch(task,
|
||||
api_key=api_key,
|
||||
workspace=workspace,
|
||||
**kwargs)
|
||||
except (asyncio.TimeoutError, aiohttp.ClientConnectorError) as e:
|
||||
logger.error(e)
|
||||
try_count += 1
|
||||
if try_count <= Transcription.MAX_QUERY_TRY_COUNT:
|
||||
time.sleep(2)
|
||||
continue
|
||||
|
||||
try_count = 0
|
||||
break
|
||||
|
||||
return TranscriptionResponse.from_api_response(response)
|
||||
|
||||
@classmethod
|
||||
def wait(cls,
|
||||
task: Union[str, TranscriptionResponse],
|
||||
api_key: str = None,
|
||||
workspace: str = None,
|
||||
**kwargs) -> TranscriptionResponse:
|
||||
"""Poll task until the final results of transcription is obtained.
|
||||
|
||||
Args:
|
||||
task (Union[str, TranscriptionResponse]): The task_id or
|
||||
response including task_id returned from async_call().
|
||||
workspace (str): The dashscope workspace id.
|
||||
|
||||
Returns:
|
||||
TranscriptionResponse: The result of batch transcription.
|
||||
"""
|
||||
response = super().wait(task,
|
||||
api_key=api_key,
|
||||
workspace=workspace,
|
||||
**kwargs)
|
||||
return TranscriptionResponse.from_api_response(response)
|
||||
|
||||
@classmethod
|
||||
def _launch_request(cls,
|
||||
model: str,
|
||||
files: List[str],
|
||||
api_key: str = None,
|
||||
workspace: str = None,
|
||||
**kwargs) -> DashScopeAPIResponse:
|
||||
"""Submit transcribe request.
|
||||
|
||||
Args:
|
||||
model (str): The requested model, such as paraformer-16k-1
|
||||
files (List[str]): List of stored URLs.
|
||||
workspace (str): The dashscope workspace id.
|
||||
|
||||
Returns:
|
||||
DashScopeAPIResponse: The result of task submission.
|
||||
"""
|
||||
task_name, function = _get_task_group_and_task(__name__)
|
||||
|
||||
try_count: int = 0
|
||||
while True:
|
||||
try:
|
||||
response = super().async_call(model=model,
|
||||
task_group='audio',
|
||||
task=task_name,
|
||||
function=function,
|
||||
input={'file_urls': files},
|
||||
api_protocol=ApiProtocol.HTTP,
|
||||
http_method=HTTPMethod.POST,
|
||||
api_key=api_key,
|
||||
workspace=workspace,
|
||||
**kwargs)
|
||||
except (asyncio.TimeoutError, aiohttp.ClientConnectorError) as e:
|
||||
logger.error(e)
|
||||
try_count += 1
|
||||
if try_count <= Transcription.MAX_QUERY_TRY_COUNT:
|
||||
time.sleep(2)
|
||||
continue
|
||||
|
||||
break
|
||||
|
||||
return response
|
||||
|
||||
@classmethod
|
||||
def _fill_resource_id(cls, phrase_id: str, **kwargs):
|
||||
resources_list: list = []
|
||||
if phrase_id is not None and len(phrase_id) > 0:
|
||||
item = {'resource_id': phrase_id, 'resource_type': 'asr_phrase'}
|
||||
resources_list.append(item)
|
||||
|
||||
if len(resources_list) > 0:
|
||||
kwargs['resources'] = resources_list
|
||||
|
||||
return kwargs
|
||||
|
||||
@classmethod
|
||||
def _tidy_kwargs(cls, **kwargs):
|
||||
for k in kwargs.copy():
|
||||
if kwargs[k] is None:
|
||||
kwargs.pop(k, None)
|
||||
return kwargs
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,177 @@
|
||||
# Copyright (c) Alibaba, Inc. and its affiliates.
|
||||
|
||||
import asyncio
|
||||
import time
|
||||
from typing import List
|
||||
|
||||
import aiohttp
|
||||
|
||||
from dashscope.client.base_api import BaseApi
|
||||
from dashscope.common.constants import ApiProtocol, HTTPMethod
|
||||
from dashscope.common.logging import logger
|
||||
|
||||
|
||||
class VocabularyServiceException(Exception):
|
||||
def __init__(self, request_id: str, status_code: int, code: str,
|
||||
error_message: str) -> None:
|
||||
self._request_id = request_id
|
||||
self._status_code = status_code
|
||||
self._code = code
|
||||
self._error_message = error_message
|
||||
|
||||
def __str__(self):
|
||||
return f'Request: {self._request_id}, Status Code: {self._status_code}, Code: {self._code}, Error Message: {self._error_message}'
|
||||
|
||||
|
||||
class VocabularyService(BaseApi):
|
||||
'''
|
||||
API for asr vocabulary service
|
||||
'''
|
||||
MAX_QUERY_TRY_COUNT = 3
|
||||
|
||||
def __init__(self,
|
||||
api_key=None,
|
||||
workspace=None,
|
||||
model=None,
|
||||
**kwargs) -> None:
|
||||
super().__init__()
|
||||
self._api_key = api_key
|
||||
self._workspace = workspace
|
||||
self._kwargs = kwargs
|
||||
self._last_request_id = None
|
||||
self.model = model
|
||||
if self.model is None:
|
||||
self.model = 'speech-biasing'
|
||||
|
||||
def __call_with_input(self, input):
|
||||
try_count = 0
|
||||
while True:
|
||||
try:
|
||||
response = super().call(model=self.model,
|
||||
task_group='audio',
|
||||
task='asr',
|
||||
function='customization',
|
||||
input=input,
|
||||
api_protocol=ApiProtocol.HTTP,
|
||||
http_method=HTTPMethod.POST,
|
||||
api_key=self._api_key,
|
||||
workspace=self._workspace,
|
||||
**self._kwargs)
|
||||
except (asyncio.TimeoutError, aiohttp.ClientConnectorError) as e:
|
||||
logger.error(e)
|
||||
try_count += 1
|
||||
if try_count <= VocabularyService.MAX_QUERY_TRY_COUNT:
|
||||
time.sleep(2)
|
||||
continue
|
||||
|
||||
break
|
||||
logger.debug('>>>>recv', response)
|
||||
return response
|
||||
|
||||
def create_vocabulary(self, target_model: str, prefix: str,
|
||||
vocabulary: List[dict]) -> str:
|
||||
'''
|
||||
创建热词表
|
||||
param: target_model 热词表对应的语音识别模型版本
|
||||
param: prefix 热词表自定义前缀,仅允许数字和小写字母,小于十个字符。
|
||||
param: vocabulary 热词表字典
|
||||
return: 热词表标识符 vocabulary_id
|
||||
'''
|
||||
response = self.__call_with_input(input={
|
||||
'action': 'create_vocabulary',
|
||||
'target_model': target_model,
|
||||
'prefix': prefix,
|
||||
'vocabulary': vocabulary,
|
||||
}, )
|
||||
if response.status_code == 200:
|
||||
self._last_request_id = response.request_id
|
||||
return response.output['vocabulary_id']
|
||||
else:
|
||||
raise VocabularyServiceException(response.request_id, response.status_code,
|
||||
response.code, response.message)
|
||||
|
||||
def list_vocabularies(self,
|
||||
prefix=None,
|
||||
page_index: int = 0,
|
||||
page_size: int = 10) -> List[dict]:
|
||||
'''
|
||||
查询已创建的所有热词表
|
||||
param: prefix 自定义前缀,如果设定则只返回指定前缀的热词表标识符列表。
|
||||
param: page_index 查询的页索引
|
||||
param: page_size 查询页大小
|
||||
return: 热词表标识符列表
|
||||
'''
|
||||
if prefix:
|
||||
response = self.__call_with_input(input={
|
||||
'action': 'list_vocabulary',
|
||||
'prefix': prefix,
|
||||
'page_index': page_index,
|
||||
'page_size': page_size,
|
||||
}, )
|
||||
else:
|
||||
response = self.__call_with_input(input={
|
||||
'action': 'list_vocabulary',
|
||||
'page_index': page_index,
|
||||
'page_size': page_size,
|
||||
}, )
|
||||
if response.status_code == 200:
|
||||
self._last_request_id = response.request_id
|
||||
return response.output['vocabulary_list']
|
||||
else:
|
||||
raise VocabularyServiceException(response.request_id, response.status_code,
|
||||
response.code, response.message)
|
||||
|
||||
def query_vocabulary(self, vocabulary_id: str) -> List[dict]:
|
||||
'''
|
||||
获取热词表内容
|
||||
param: vocabulary_id 热词表标识符
|
||||
return: 热词表
|
||||
'''
|
||||
response = self.__call_with_input(input={
|
||||
'action': 'query_vocabulary',
|
||||
'vocabulary_id': vocabulary_id,
|
||||
}, )
|
||||
if response.status_code == 200:
|
||||
self._last_request_id = response.request_id
|
||||
return response.output
|
||||
else:
|
||||
raise VocabularyServiceException(response.request_id, response.status_code,
|
||||
response.code, response.message)
|
||||
|
||||
def update_vocabulary(self, vocabulary_id: str,
|
||||
vocabulary: List[dict]) -> None:
|
||||
'''
|
||||
用新的热词表替换已有热词表
|
||||
param: vocabulary_id 需要替换的热词表标识符
|
||||
param: vocabulary 热词表
|
||||
'''
|
||||
response = self.__call_with_input(input={
|
||||
'action': 'update_vocabulary',
|
||||
'vocabulary_id': vocabulary_id,
|
||||
'vocabulary': vocabulary,
|
||||
}, )
|
||||
if response.status_code == 200:
|
||||
self._last_request_id = response.request_id
|
||||
return
|
||||
else:
|
||||
raise VocabularyServiceException(response.request_id, response.status_code,
|
||||
response.code, response.message)
|
||||
|
||||
def delete_vocabulary(self, vocabulary_id: str) -> None:
|
||||
'''
|
||||
删除热词表
|
||||
param: vocabulary_id 需要删除的热词表标识符
|
||||
'''
|
||||
response = self.__call_with_input(input={
|
||||
'action': 'delete_vocabulary',
|
||||
'vocabulary_id': vocabulary_id,
|
||||
}, )
|
||||
if response.status_code == 200:
|
||||
self._last_request_id = response.request_id
|
||||
return
|
||||
else:
|
||||
raise VocabularyServiceException(response.request_id, response.status_code,
|
||||
response.code, response.message)
|
||||
|
||||
def get_last_request_id(self):
|
||||
return self._last_request_id
|
||||
Reference in New Issue
Block a user