chore: 添加虚拟环境到仓库
- 添加 backend_service/venv 虚拟环境 - 包含所有Python依赖包 - 注意:虚拟环境约393MB,包含12655个文件
This commit is contained in:
@@ -0,0 +1,8 @@
|
||||
# Generated content DO NOT EDIT
|
||||
from .. import trainers
|
||||
|
||||
Trainer = trainers.Trainer
|
||||
BpeTrainer = trainers.BpeTrainer
|
||||
UnigramTrainer = trainers.UnigramTrainer
|
||||
WordLevelTrainer = trainers.WordLevelTrainer
|
||||
WordPieceTrainer = trainers.WordPieceTrainer
|
||||
@@ -0,0 +1,173 @@
|
||||
# Generated content DO NOT EDIT
|
||||
class Trainer:
|
||||
"""
|
||||
Base class for all trainers
|
||||
|
||||
This class is not supposed to be instantiated directly. Instead, any implementation of a
|
||||
Trainer will return an instance of this class when instantiated.
|
||||
"""
|
||||
|
||||
class BpeTrainer(Trainer):
|
||||
"""
|
||||
Trainer capable of training a BPE model
|
||||
|
||||
Args:
|
||||
vocab_size (:obj:`int`, `optional`):
|
||||
The size of the final vocabulary, including all tokens and alphabet.
|
||||
|
||||
min_frequency (:obj:`int`, `optional`):
|
||||
The minimum frequency a pair should have in order to be merged.
|
||||
|
||||
show_progress (:obj:`bool`, `optional`):
|
||||
Whether to show progress bars while training.
|
||||
|
||||
special_tokens (:obj:`List[Union[str, AddedToken]]`, `optional`):
|
||||
A list of special tokens the model should know of.
|
||||
|
||||
limit_alphabet (:obj:`int`, `optional`):
|
||||
The maximum different characters to keep in the alphabet.
|
||||
|
||||
initial_alphabet (:obj:`List[str]`, `optional`):
|
||||
A list of characters to include in the initial alphabet, even
|
||||
if not seen in the training dataset.
|
||||
If the strings contain more than one character, only the first one
|
||||
is kept.
|
||||
|
||||
continuing_subword_prefix (:obj:`str`, `optional`):
|
||||
A prefix to be used for every subword that is not a beginning-of-word.
|
||||
|
||||
end_of_word_suffix (:obj:`str`, `optional`):
|
||||
A suffix to be used for every subword that is a end-of-word.
|
||||
|
||||
max_token_length (:obj:`int`, `optional`):
|
||||
Prevents creating tokens longer than the specified size.
|
||||
This can help with reducing polluting your vocabulary with
|
||||
highly repetitive tokens like `======` for wikipedia
|
||||
|
||||
"""
|
||||
def __init__(
|
||||
self,
|
||||
vocab_size=30000,
|
||||
min_frequency=0,
|
||||
show_progress=True,
|
||||
special_tokens=[],
|
||||
limit_alphabet=None,
|
||||
initial_alphabet=[],
|
||||
continuing_subword_prefix=None,
|
||||
end_of_word_suffix=None,
|
||||
max_token_length=None,
|
||||
words={},
|
||||
):
|
||||
pass
|
||||
|
||||
class UnigramTrainer(Trainer):
|
||||
"""
|
||||
Trainer capable of training a Unigram model
|
||||
|
||||
Args:
|
||||
vocab_size (:obj:`int`):
|
||||
The size of the final vocabulary, including all tokens and alphabet.
|
||||
|
||||
show_progress (:obj:`bool`):
|
||||
Whether to show progress bars while training.
|
||||
|
||||
special_tokens (:obj:`List[Union[str, AddedToken]]`):
|
||||
A list of special tokens the model should know of.
|
||||
|
||||
initial_alphabet (:obj:`List[str]`):
|
||||
A list of characters to include in the initial alphabet, even
|
||||
if not seen in the training dataset.
|
||||
If the strings contain more than one character, only the first one
|
||||
is kept.
|
||||
|
||||
shrinking_factor (:obj:`float`):
|
||||
The shrinking factor used at each step of the training to prune the
|
||||
vocabulary.
|
||||
|
||||
unk_token (:obj:`str`):
|
||||
The token used for out-of-vocabulary tokens.
|
||||
|
||||
max_piece_length (:obj:`int`):
|
||||
The maximum length of a given token.
|
||||
|
||||
n_sub_iterations (:obj:`int`):
|
||||
The number of iterations of the EM algorithm to perform before
|
||||
pruning the vocabulary.
|
||||
"""
|
||||
def __init__(
|
||||
self,
|
||||
vocab_size=8000,
|
||||
show_progress=True,
|
||||
special_tokens=[],
|
||||
initial_alphabet=[],
|
||||
shrinking_factor=0.75,
|
||||
unk_token=None,
|
||||
max_piece_length=16,
|
||||
n_sub_iterations=2,
|
||||
):
|
||||
pass
|
||||
|
||||
class WordLevelTrainer(Trainer):
|
||||
"""
|
||||
Trainer capable of training a WorldLevel model
|
||||
|
||||
Args:
|
||||
vocab_size (:obj:`int`, `optional`):
|
||||
The size of the final vocabulary, including all tokens and alphabet.
|
||||
|
||||
min_frequency (:obj:`int`, `optional`):
|
||||
The minimum frequency a pair should have in order to be merged.
|
||||
|
||||
show_progress (:obj:`bool`, `optional`):
|
||||
Whether to show progress bars while training.
|
||||
|
||||
special_tokens (:obj:`List[Union[str, AddedToken]]`):
|
||||
A list of special tokens the model should know of.
|
||||
"""
|
||||
def __init__(self, vocab_size=30000, min_frequency=0, show_progress=True, special_tokens=[]):
|
||||
pass
|
||||
|
||||
class WordPieceTrainer(Trainer):
|
||||
"""
|
||||
Trainer capable of training a WordPiece model
|
||||
|
||||
Args:
|
||||
vocab_size (:obj:`int`, `optional`):
|
||||
The size of the final vocabulary, including all tokens and alphabet.
|
||||
|
||||
min_frequency (:obj:`int`, `optional`):
|
||||
The minimum frequency a pair should have in order to be merged.
|
||||
|
||||
show_progress (:obj:`bool`, `optional`):
|
||||
Whether to show progress bars while training.
|
||||
|
||||
special_tokens (:obj:`List[Union[str, AddedToken]]`, `optional`):
|
||||
A list of special tokens the model should know of.
|
||||
|
||||
limit_alphabet (:obj:`int`, `optional`):
|
||||
The maximum different characters to keep in the alphabet.
|
||||
|
||||
initial_alphabet (:obj:`List[str]`, `optional`):
|
||||
A list of characters to include in the initial alphabet, even
|
||||
if not seen in the training dataset.
|
||||
If the strings contain more than one character, only the first one
|
||||
is kept.
|
||||
|
||||
continuing_subword_prefix (:obj:`str`, `optional`):
|
||||
A prefix to be used for every subword that is not a beginning-of-word.
|
||||
|
||||
end_of_word_suffix (:obj:`str`, `optional`):
|
||||
A suffix to be used for every subword that is a end-of-word.
|
||||
"""
|
||||
def __init__(
|
||||
self,
|
||||
vocab_size=30000,
|
||||
min_frequency=0,
|
||||
show_progress=True,
|
||||
special_tokens=[],
|
||||
limit_alphabet=None,
|
||||
initial_alphabet=[],
|
||||
continuing_subword_prefix="##",
|
||||
end_of_word_suffix=None,
|
||||
):
|
||||
pass
|
||||
Reference in New Issue
Block a user