增加环绕侦察场景适配

This commit is contained in:
2026-01-08 15:44:38 +08:00
parent 3eba1f962b
commit 10c5bb5a8a
5441 changed files with 40219 additions and 379695 deletions

View File

@@ -187,7 +187,7 @@ class BaseTokenizer:
Returns:
The normalized string
"""
return self._tokenizer.normalize(sequence)
return self._tokenizer.normalizer.normalize_str(sequence)
def encode(
self,

View File

@@ -150,7 +150,7 @@ class SentencePieceUnigramTokenizer(BaseTokenizer):
sys.path.append(".")
import sentencepiece_model_pb2 as model
import sentencepiece_model_pb2 as model # type: ignore[import]
except Exception:
raise Exception(
"You don't seem to have the required protobuf file, in order to use this function you need to run `pip install protobuf` and `wget https://raw.githubusercontent.com/google/sentencepiece/master/python/src/sentencepiece/sentencepiece_model_pb2.py` for us to be able to read the intrinsics of your spm_file. `pip install sentencepiece` is not required."
@@ -191,6 +191,6 @@ class SentencePieceUnigramTokenizer(BaseTokenizer):
"model": "SentencePieceUnigram",
}
obj = BaseTokenizer.__new__(SentencePieceUnigramTokenizer, tokenizer, parameters)
obj = BaseTokenizer.__new__(SentencePieceUnigramTokenizer, tokenizer, parameters) # type: ignore[arg-type]
BaseTokenizer.__init__(obj, tokenizer, parameters)
return obj